diff --git a/clang-tools-extra/pseudo/gen/Main.cpp b/clang-tools-extra/pseudo/gen/Main.cpp --- a/clang-tools-extra/pseudo/gen/Main.cpp +++ b/clang-tools-extra/pseudo/gen/Main.cpp @@ -83,17 +83,19 @@ #ifndef NONTERMINAL #define NONTERMINAL(X, Y) #endif +#ifndef RULE +#define RULE(X, Y) +#endif #ifndef EXTENSION #define EXTENSION(X, Y) #endif - )cpp"; +)cpp"; for (clang::pseudo::SymbolID ID = 0; ID < G.table().Nonterminals.size(); - ++ID) { - std::string Name = G.symbolName(ID).str(); - // translation-unit -> translation_unit - std::replace(Name.begin(), Name.end(), '-', '_'); - Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", Name, ID); - } + ++ID) + Out.os() << llvm::formatv("NONTERMINAL({0}, {1})\n", G.mangleSymbol(ID), + ID); + for (clang::pseudo::RuleID RID = 0; RID < G.table().Rules.size(); ++RID) + Out.os() << llvm::formatv("RULE({0}, {1})\n", G.mangleRule(RID), RID); for (clang::pseudo::ExtensionID EID = 1 /*skip the sentinel 0 value*/; EID < G.table().AttributeValues.size(); ++EID) { llvm::StringRef Name = G.table().AttributeValues[EID]; @@ -102,8 +104,9 @@ } Out.os() << R"cpp( #undef NONTERMINAL +#undef RULE #undef EXTENSION - )cpp"; +)cpp"; break; case EmitGrammarContent: for (llvm::StringRef Line : llvm::split(GrammarText, '\n')) { diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/cxx/CXX.h @@ -37,6 +37,12 @@ #undef NONTERMINAL }; +enum class Rule : RuleID { +#define RULE(X, Y) X = Y, +#include "CXXSymbols.inc" +#undef RULE +}; + enum class Extension : ExtensionID { #define EXTENSION(X, Y) X = Y, #include "CXXSymbols.inc" diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h @@ -165,6 +165,21 @@ // Terminals have names like "," (kw_comma) or "OPERATOR" (kw_operator). llvm::StringRef symbolName(SymbolID) const; + // Gets the mangled name for a terminal/nonterminal. + // Compared to names in the grammar, + // nonterminals `ptr-declartor` becomes `ptr_declarator`; + // terminal `,` becomes `comma`; + // terminal `IDENTIFIER` becomes `identifier`; + // terminal `INT` becomes `int`; + // NOTE: for nonterminals, the mangled name is the same as the cxx::Symbol + // enum class; for terminals, we deliberately stripped the `kw_` prefix in + // favor of the simplicity. + std::string mangleSymbol(SymbolID) const; + // Gets the mangled name for the rule. + // E.g. for the grammar rule `ptr-declarator := ptr-operator ptr-declarator`, + // it is `ptr_declarator_0ptr_operator_1ptr_declarator`. + std::string mangleRule(RuleID) const; + // Lookup the SymbolID of the nonterminal symbol by Name. llvm::Optional findNonterminal(llvm::StringRef Name) const; diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp --- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp @@ -45,6 +45,28 @@ return T->Nonterminals[SID].Name; } +std::string Grammar::mangleSymbol(SymbolID SID) const { + static const char *const TokNames[] = { +#define TOK(X) #X, +#define KEYWORD(X, Y) #X, +#include "clang/Basic/TokenKinds.def" + nullptr}; + if (clang::pseudo::isToken(SID)) + return TokNames[clang::pseudo::symbolToToken(SID)]; + std::string Name = symbolName(SID).str(); + // translation-unit -> translation_unit + std::replace(Name.begin(), Name.end(), '-', '_'); + return Name; +} + +std::string Grammar::mangleRule(RuleID RID) const { + const auto &R = lookupRule(RID); + std::string MangleName = mangleSymbol(R.Target); + for (size_t I = 0; I < R.seq().size(); ++I) + MangleName += llvm::formatv("_{0}{1}", I, mangleSymbol(R.seq()[I])); + return MangleName; +} + llvm::Optional Grammar::findNonterminal(llvm::StringRef Name) const { auto It = llvm::partition_point( T->Nonterminals, diff --git a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp --- a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp @@ -114,6 +114,21 @@ EXPECT_NE(G.lookupRule(ruleFor("x")).Guard, G.lookupRule(ruleFor("y")).Guard); } +TEST_F(GrammarTest, MangleName) { + build(R"bnf( + _ := declaration + + declaration := ptr-declarator ; + ptr-declarator := * IDENTIFIER + + )bnf"); + ASSERT_TRUE(Diags.empty()); + EXPECT_EQ(G.mangleRule(ruleFor("declaration")), + "declaration_0ptr_declarator_1semi"); + EXPECT_EQ(G.mangleRule(ruleFor("ptr-declarator")), + "ptr_declarator_0star_1identifier"); +} + TEST_F(GrammarTest, Diagnostics) { build(R"cpp( _ := ,_opt