diff --git a/clang/include/clang/Tooling/Syntax/CMakeLists.txt b/clang/include/clang/Tooling/Syntax/CMakeLists.txt --- a/clang/include/clang/Tooling/Syntax/CMakeLists.txt +++ b/clang/include/clang/Tooling/Syntax/CMakeLists.txt @@ -2,3 +2,6 @@ SOURCE Nodes.td TARGET ClangSyntaxNodeList) +clang_tablegen(NodeClasses.inc -gen-clang-syntax-node-classes + SOURCE Nodes.td + TARGET ClangSyntaxNodeClasses) diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -114,67 +114,7 @@ /// For debugging purposes. raw_ostream &operator<<(raw_ostream &OS, NodeRole R); -class SimpleDeclarator; - -/// A root node for a translation unit. Parent is always null. -class TranslationUnit final : public Tree { -public: - TranslationUnit() : Tree(NodeKind::TranslationUnit) {} - static bool classof(const Node *N); -}; - -/// A base class for all expressions. Note that expressions are not statements, -/// even though they are in clang. -class Expression : public Tree { -public: - Expression(NodeKind K) : Tree(K) {} - static bool classof(const Node *N); -}; - -/// A sequence of these specifiers make a `nested-name-specifier`. -/// e.g. the `std` or `vector` in `std::vector::size`. -class NameSpecifier : public Tree { -public: - NameSpecifier(NodeKind K) : Tree(K) {} - static bool classof(const Node *N); -}; - -/// The global namespace name specifier, this specifier doesn't correspond to a -/// token instead an absence of tokens before a `::` characterizes it, in -/// `::std::vector` it would be characterized by the absence of a token -/// before the first `::` -class GlobalNameSpecifier final : public NameSpecifier { -public: - GlobalNameSpecifier() : NameSpecifier(NodeKind::GlobalNameSpecifier) {} - static bool classof(const Node *N); -}; - -/// A name specifier holding a decltype, of the form: `decltype ( expression ) ` -/// e.g. the `decltype(s)` in `decltype(s)::size`. -class DecltypeNameSpecifier final : public NameSpecifier { -public: - DecltypeNameSpecifier() : NameSpecifier(NodeKind::DecltypeNameSpecifier) {} - static bool classof(const Node *N); -}; - -/// A identifier name specifier, of the form `identifier` -/// e.g. the `std` in `std::vector::size`. -class IdentifierNameSpecifier final : public NameSpecifier { -public: - IdentifierNameSpecifier() - : NameSpecifier(NodeKind::IdentifierNameSpecifier) {} - static bool classof(const Node *N); -}; - -/// A name specifier with a simple-template-id, of the form `template_opt -/// identifier < template-args >` e.g. the `vector` in -/// `std::vector::size`. -class SimpleTemplateNameSpecifier final : public NameSpecifier { -public: - SimpleTemplateNameSpecifier() - : NameSpecifier(NodeKind::SimpleTemplateNameSpecifier) {} - static bool classof(const Node *N); -}; +#include "clang/Tooling/Syntax/NodeClasses.inc" /// Models a `nested-name-specifier`. C++ [expr.prim.id.qual] /// e.g. the `std::vector::` in `std::vector::size`. diff --git a/clang/include/clang/Tooling/Syntax/Nodes.td b/clang/include/clang/Tooling/Syntax/Nodes.td --- a/clang/include/clang/Tooling/Syntax/Nodes.td +++ b/clang/include/clang/Tooling/Syntax/Nodes.td @@ -17,13 +17,18 @@ include "clang/Tooling/Syntax/Syntax.td" -def Node : External {} -def Leaf : External {} -def Tree : External {} +def TranslationUnit : Unconstrained { + let documentation = [{ + A root node for a translation unit. Parent is always null. + }]; +} -def TranslationUnit : External {} - -def Expression : External {} +def Expression : Alternatives { + let documentation = [{ + A base class for all expressions. Note that expressions are not statements, + even though they are in clang. + }]; +} def UnknownExpression : External {} def UnaryOperatorExpression : External {} def PrefixUnaryOperatorExpression : External {} @@ -99,8 +104,36 @@ def NestedNameSpecifier : External {} // Name Specifiers. -def NameSpecifier : External {} -def GlobalNameSpecifier : External {} -def DecltypeNameSpecifier : External {} -def IdentifierNameSpecifier : External {} -def SimpleTemplateNameSpecifier : External {} +def NameSpecifier : Alternatives { + let documentation = [{ + A sequence of these specifiers make a `nested-name-specifier`. + e.g. the `std` or `vector` in `std::vector::size`. + }]; +} +def GlobalNameSpecifier : Unconstrained { + let documentation = [{ + The global namespace name specifier, this specifier doesn't correspond to a + token instead an absence of tokens before a `::` characterizes it, in + `::std::vector` it would be characterized by the absence of a token + before the first `::` + }]; +} +def DecltypeNameSpecifier : Unconstrained { + let documentation = [{ + A name specifier holding a decltype, of the form: `decltype ( expression ) ` + e.g. the `decltype(s)` in `decltype(s)::size`. + }]; +} +def IdentifierNameSpecifier : Unconstrained { + let documentation = [{ + A identifier name specifier, of the form `identifier` + e.g. the `std` in `std::vector::size`. + }]; +} +def SimpleTemplateNameSpecifier : Unconstrained { + let documentation = [{ + A name specifier with a simple-template-id, of the form `template_opt + identifier < template-args >` e.g. the `vector` in + `std::vector::size`. + }]; +} diff --git a/clang/include/clang/Tooling/Syntax/Syntax.td b/clang/include/clang/Tooling/Syntax/Syntax.td --- a/clang/include/clang/Tooling/Syntax/Syntax.td +++ b/clang/include/clang/Tooling/Syntax/Syntax.td @@ -31,10 +31,28 @@ class NodeType { // The NodeType that this node is derived from in the Node class hierarchy. NodeType base = ?; + // Documentation for this Node subclass. + string documentation; } // A node type which is defined in Nodes.h rather than by generated code. // We merely specify the inheritance hierarchy here. class External : NodeType { let base = base_; } -// FIXME: add sequence, list, and alternative archetypes. +// Special nodes defined here. +def Node : External {} +def Leaf : External {} +def Tree : External {} + +// An abstract node type which merely serves as a base for more specific types. +// +// This corresponds to an alternative rule in the grammar, such as: +// Statement = IfStatement | ForStatement | ... +// Statement is modeled using Alternatives, and IfStatement.base is Statement. +class Alternatives : NodeType { let base = base_; } + +// A node type which may contain anything and has no specific accessors. +// These are generally placeholders for a more precise implementation. +class Unconstrained : NodeType { let base = base_; } + +// FIXME: add sequence and list archetypes. diff --git a/clang/utils/TableGen/ClangSyntaxEmitter.cpp b/clang/utils/TableGen/ClangSyntaxEmitter.cpp --- a/clang/utils/TableGen/ClangSyntaxEmitter.cpp +++ b/clang/utils/TableGen/ClangSyntaxEmitter.cpp @@ -16,19 +16,25 @@ // ABSTRACT_NODE(Type, Base, FirstKind, LastKind) // similar to those for AST nodes such as AST/DeclNodes.inc. // -// In future, the class definitions will be produced by additional backends. +// The -gen-clang-syntax-node-classes backend produces definitions for the +// syntax::Node subclasses (except those marked as External). +// +// In future, another backend will encode the structure of the various node +// types in tables so their invariants can be checked and enforced. // //===----------------------------------------------------------------------===// #include "TableGenBackends.h" #include +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/raw_ostream.h" #include "llvm/TableGen/Record.h" #include "llvm/TableGen/TableGenBackend.h" namespace { +using llvm::formatv; // The class hierarchy of Node types. // We assemble this in order to be able to define the NodeKind enum in a @@ -41,10 +47,15 @@ for (llvm::Record *Derived : Records.getAllDerivedDefinitions("NodeType")) if (llvm::Record *Base = Derived->getValueAsOptionalDef("base")) link(Derived, Base); - for (NodeType &N : AllTypes) + for (NodeType &N : AllTypes) { llvm::sort(N.Derived, [](const NodeType *L, const NodeType *R) { return L->Record->getName() < R->Record->getName(); }); + // Alternatives nodes must have subclasses, External nodes may do. + assert(N.Record->isSubClassOf("Alternatives") || + N.Record->isSubClassOf("External") || N.Derived.empty()); + assert(!N.Record->isSubClassOf("Alternatives") || !N.Derived.empty()); + } } struct NodeType { @@ -60,6 +71,16 @@ return *NI->second; } + // Traverse the hierarchy in pre-order (base classes before derived). + void visit(llvm::function_ref CB, + const NodeType *Start = nullptr) { + if (Start == nullptr) + Start = &get(); + CB(*Start); + for (const NodeType *D : Start->Derived) + visit(CB, D); + } + private: void add(const llvm::Record *R) { AllTypes.emplace_back(); @@ -87,26 +108,12 @@ return N.Derived.empty() ? N : lastConcrete(*N.Derived.back()); } -void emitNodeList(const Hierarchy::NodeType &N, llvm::raw_ostream &OS) { - // Don't emit ABSTRACT_NODE for node itself, which has no parent. - if (N.Base != nullptr) { - if (N.Derived.empty()) - OS << llvm::formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name()); - else - OS << llvm::formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(), - N.Base->name(), firstConcrete(N).name(), - lastConcrete(N).name()); - } - for (const auto *C : N.Derived) - emitNodeList(*C, OS); -} - } // namespace void clang::EmitClangSyntaxNodeList(llvm::RecordKeeper &Records, llvm::raw_ostream &OS) { llvm::emitSourceFileHeader("Syntax tree node list", OS); - OS << "// Generated from " << Records.getInputFilename() << "\n"; + Hierarchy H(Records); OS << R"cpp( #ifndef NODE #define NODE(Kind, Base) @@ -121,10 +128,77 @@ #endif )cpp"; - emitNodeList(Hierarchy(Records).get(), OS); + H.visit([&](const Hierarchy::NodeType &N) { + // Don't emit ABSTRACT_NODE for node itself, which has no parent. + if (N.Base == nullptr) + return; + if (N.Derived.empty()) + OS << formatv("CONCRETE_NODE({0},{1})\n", N.name(), N.Base->name()); + else + OS << formatv("ABSTRACT_NODE({0},{1},{2},{3})\n", N.name(), + N.Base->name(), firstConcrete(N).name(), + lastConcrete(N).name()); + }); OS << R"cpp( #undef NODE #undef CONCRETE_NODE #undef ABSTRACT_NODE )cpp"; } + +// Format a documentation string as a C++ comment. +// Trims leading whitespace handling since comments come from a TableGen file: +// documentation = [{ +// This is a widget. Example: +// widget.explode() +// }]; +// and should be formatted as: +// /// This is a widget. Example: +// /// widget.explode() +// Leading and trailing whitespace lines are stripped. +// The indentation of the first line is stripped from all lines. +static void printDoc(llvm::StringRef Doc, llvm::raw_ostream &OS) { + Doc = Doc.rtrim(); + llvm::StringRef Line; + while (Line.trim().empty() && !Doc.empty()) + std::tie(Line, Doc) = Doc.split('\n'); + llvm::StringRef Indent = Line.take_while(llvm::isSpace); + for (; !Line.empty() || !Doc.empty(); std::tie(Line, Doc) = Doc.split('\n')) { + Line.consume_front(Indent); + OS << "/// " << Line << "\n"; + } +} + +void clang::EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records, + llvm::raw_ostream &OS) { + llvm::emitSourceFileHeader("Syntax tree node list", OS); + Hierarchy H(Records); + + OS << "\n// Forward-declare node types so we don't have to carefully " + "sequence definitions.\n"; + H.visit([&](const Hierarchy::NodeType &N) { + OS << "class " << N.name() << ";\n"; + }); + + OS << "\n// Node definitions\n\n"; + H.visit([&](const Hierarchy::NodeType &N) { + if (N.Record->isSubClassOf("External")) + return; + printDoc(N.Record->getValueAsString("documentation"), OS); + OS << formatv("class {0}{1} : public {2} {{\n", N.name(), + N.Derived.empty() ? " final" : "", N.Base->name()); + + // Constructor. + if (N.Derived.empty()) + OS << formatv("public:\n {0}() : {1}(NodeKind::{0}) {{}\n", N.name(), + N.Base->name()); + else + OS << formatv("protected:\n {0}(NodeKind K) : {1}(K) {{}\npublic:\n", + N.name(), N.Base->name()); + + // classof. FIXME: move definition inline once ~all nodes are generated. + OS << " static bool classof(const Node *N);\n"; + + OS << "};\n\n"; + }); +} diff --git a/clang/utils/TableGen/TableGen.cpp b/clang/utils/TableGen/TableGen.cpp --- a/clang/utils/TableGen/TableGen.cpp +++ b/clang/utils/TableGen/TableGen.cpp @@ -56,6 +56,7 @@ GenClangOpcodes, GenClangSACheckers, GenClangSyntaxNodeList, + GenClangSyntaxNodeClasses, GenClangCommentHTMLTags, GenClangCommentHTMLTagsProperties, GenClangCommentHTMLNamedCharacterReferences, @@ -169,6 +170,8 @@ "Generate Clang Static Analyzer checkers"), clEnumValN(GenClangSyntaxNodeList, "gen-clang-syntax-node-list", "Generate list of Clang Syntax Tree node types"), + clEnumValN(GenClangSyntaxNodeClasses, "gen-clang-syntax-node-classes", + "Generate definitions of Clang Syntax Tree node clasess"), clEnumValN(GenClangCommentHTMLTags, "gen-clang-comment-html-tags", "Generate efficient matchers for HTML tag " "names that are used in documentation comments"), @@ -362,6 +365,9 @@ case GenClangSyntaxNodeList: EmitClangSyntaxNodeList(Records, OS); break; + case GenClangSyntaxNodeClasses: + EmitClangSyntaxNodeClasses(Records, OS); + break; case GenArmNeon: EmitNeon(Records, OS); break; diff --git a/clang/utils/TableGen/TableGenBackends.h b/clang/utils/TableGen/TableGenBackends.h --- a/clang/utils/TableGen/TableGenBackends.h +++ b/clang/utils/TableGen/TableGenBackends.h @@ -85,6 +85,8 @@ void EmitClangSyntaxNodeList(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); +void EmitClangSyntaxNodeClasses(llvm::RecordKeeper &Records, + llvm::raw_ostream &OS); void EmitNeon(llvm::RecordKeeper &Records, llvm::raw_ostream &OS); void EmitFP16(llvm::RecordKeeper &Records, llvm::raw_ostream &OS);