diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -6,6 +6,17 @@ // //===----------------------------------------------------------------------===// // Syntax tree nodes for C, C++ and Objective-C grammar constructs. +// +// Nodes provide access to their syntactic components, e.g. IfStatement provides +// a way to get its condition, then and else branches, tokens for 'if' and +// 'else' keywords. +// When using the accessors, please assume they can return null. This happens +// because: +// - the corresponding subnode is optional in the C++ grammar, e.g. an else +// branch of an if statement, +// - syntactic errors occurred while parsing the corresponding subnode. +// One notable exception is "introducer" keywords, e.g. the accessor for the +// 'if' keyword of an if statement will never return null. //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H #define LLVM_CLANG_TOOLING_SYNTAX_NODES_H @@ -17,31 +28,70 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/raw_ostream.h" - namespace clang { namespace syntax { -/// A kind of a syntax node, used for implementing casts. +/// A kind of a syntax node, used for implementing casts. The ordering and +/// blocks of enumerator constants must correspond to the inheritance hierarchy +/// of syntax::Node. enum class NodeKind : uint16_t { Leaf, TranslationUnit, TopLevelDeclaration, + + // Expressions + UnknownExpression, + + // Statements + UnknownStatement, + DeclarationStatement, + EmptyStatement, + SwitchStatement, + CaseStatement, + DefaultStatement, + IfStatement, + ForStatement, + WhileStatement, + ContinueStatement, + BreakStatement, + ReturnStatement, + RangeBasedForStatement, + ExpressionStatement, CompoundStatement }; /// For debugging purposes. llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K); -/// A relation between a parent and child node. Used for implementing accessors. +/// A relation between a parent and child node, e.g. 'left-hand-side of +/// a binary expression'. Used for implementing accessors. enum class NodeRole : uint8_t { - // A node without a parent. + // Roles common to multiple node kinds. + /// A node without a parent Detached, - // Children of an unknown semantic nature, e.g. skipped tokens, comments. + /// Children of an unknown semantic nature, e.g. skipped tokens, comments. Unknown, - // FIXME: should this be shared for all other nodes with braces, e.g. init - // lists? - CompoundStatement_lbrace, - CompoundStatement_rbrace + /// An opening parenthesis in argument lists and blocks, e.g. '{', '(', etc. + OpenParen, + /// A closing parenthesis in argument lists and blocks, e.g. '}', ')', etc. + CloseParen, + /// A keywords that introduces some grammar construct, e.g. 'if', 'try', etc. + IntroducerKeyword, + /// An inner statement for those that have only a single child of kind + /// statement, e.g. loop body for while, for, etc; inner statement for case, + /// default, etc. + BodyStatement, + + // Roles specific to particular node kinds. + CaseStatement_value, + IfStatement_thenStatement, + IfStatement_elseKeyword, + IfStatement_elseStatement, + ReturnStatement_value, + ExpressionStatement_expression, + CompoundStatement_statement }; +/// For debugging purposes. +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeRole R); /// A root node for a translation unit. Parent is always null. class TranslationUnit final : public Tree { @@ -66,16 +116,190 @@ } }; +/// A base class for all expressions. Note that expressions are not statements, +/// even though they are in clang. +class Expression : public Tree { +public: + Expression(NodeKind K) : Tree(K) {} + static bool classof(const Node *N) { + return NodeKind::UnknownExpression <= N->kind() && + N->kind() <= NodeKind::UnknownExpression; + } +}; + +/// An expression of an unknown kind, i.e. one not currently handled by the +/// syntax tree. +class UnknownExpression final : public Expression { +public: + UnknownExpression() : Expression(NodeKind::UnknownExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownExpression; + } +}; + /// An abstract node for C++ statements, e.g. 'while', 'if', etc. +/// FIXME: add accessors for semicolon of statements that have it. class Statement : public Tree { public: Statement(NodeKind K) : Tree(K) {} static bool classof(const Node *N) { - return NodeKind::CompoundStatement <= N->kind() && + return NodeKind::UnknownStatement <= N->kind() && N->kind() <= NodeKind::CompoundStatement; } }; +/// A statement of an unknown kind, i.e. one not currently handled by the syntax +/// tree. +class UnknownStatement final : public Statement { +public: + UnknownStatement() : Statement(NodeKind::UnknownStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownStatement; + } +}; + +/// E.g. 'int a, b = 10;' +class DeclarationStatement final : public Statement { +public: + DeclarationStatement() : Statement(NodeKind::DeclarationStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::DeclarationStatement; + } +}; + +/// The no-op statement, i.e. ';'. +class EmptyStatement final : public Statement { +public: + EmptyStatement() : Statement(NodeKind::EmptyStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::EmptyStatement; + } +}; + +/// switch () +class SwitchStatement final : public Statement { +public: + SwitchStatement() : Statement(NodeKind::SwitchStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::SwitchStatement; + } + syntax::Leaf *switchKeyword(); + syntax::Statement *body(); +}; + +/// case : +class CaseStatement final : public Statement { +public: + CaseStatement() : Statement(NodeKind::CaseStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::CaseStatement; + } + syntax::Leaf *caseKeyword(); + syntax::Expression *value(); + syntax::Statement *body(); +}; + +/// default: +class DefaultStatement final : public Statement { +public: + DefaultStatement() : Statement(NodeKind::DefaultStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::DefaultStatement; + } + syntax::Leaf *defaultKeyword(); + syntax::Statement *body(); +}; + +/// if (cond) else +/// FIXME: add condition that models 'expression or variable declaration' +class IfStatement final : public Statement { +public: + IfStatement() : Statement(NodeKind::IfStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::IfStatement; + } + syntax::Leaf *ifKeyword(); + syntax::Statement *thenStatement(); + syntax::Leaf *elseKeyword(); + syntax::Statement *elseStatement(); +}; + +/// for (; ; ) +class ForStatement final : public Statement { +public: + ForStatement() : Statement(NodeKind::ForStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ForStatement; + } + syntax::Leaf *forKeyword(); + syntax::Statement *body(); +}; + +/// while () +class WhileStatement final : public Statement { +public: + WhileStatement() : Statement(NodeKind::WhileStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::WhileStatement; + } + syntax::Leaf *whileKeyword(); + syntax::Statement *body(); +}; + +/// continue; +class ContinueStatement final : public Statement { +public: + ContinueStatement() : Statement(NodeKind::ContinueStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ContinueStatement; + } + syntax::Leaf *continueKeyword(); +}; + +/// break; +class BreakStatement final : public Statement { +public: + BreakStatement() : Statement(NodeKind::BreakStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::BreakStatement; + } + syntax::Leaf *breakKeyword(); +}; + +/// return ; +/// return; +class ReturnStatement final : public Statement { +public: + ReturnStatement() : Statement(NodeKind::ReturnStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ReturnStatement; + } + syntax::Leaf *returnKeyword(); + syntax::Expression *value(); +}; + +/// for ( : ) +class RangeBasedForStatement final : public Statement { +public: + RangeBasedForStatement() : Statement(NodeKind::RangeBasedForStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::RangeBasedForStatement; + } + syntax::Leaf *forKeyword(); + syntax::Statement *body(); +}; + +/// Expression in a statement position, e.g. functions calls inside compound +/// statements or inside a loop body. +class ExpressionStatement final : public Statement { +public: + ExpressionStatement() : Statement(NodeKind::ExpressionStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ExpressionStatement; + } + syntax::Expression *expression(); +}; + /// { statement1; statement2; … } class CompoundStatement final : public Statement { public: @@ -84,6 +308,8 @@ return N->kind() == NodeKind::CompoundStatement; } syntax::Leaf *lbrace(); + /// FIXME: use custom iterator instead of 'vector'. + std::vector statements(); syntax::Leaf *rbrace(); }; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -27,6 +27,8 @@ using namespace clang; +static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } + /// A helper class for constructing the syntax tree while traversing a clang /// AST. /// @@ -52,6 +54,15 @@ /// Range. void foldNode(llvm::ArrayRef Range, syntax::Tree *New); + /// Mark the \p Child node with a corresponding \p Role. All marked children + /// should be consumed by foldNode. + /// (!) when called on expressions (clang::Expr is derived from clang::Stmt), + /// wraps expressions into expression statement. + void markStmtChild(Stmt *Child, NodeRole Role); + /// Should be called for expressions in non-statement position to avoid + /// wrapping into expression statement. + void markExprChild(Expr *Child, NodeRole Role); + /// Set role for a token starting at \p Loc. void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); @@ -83,8 +94,23 @@ llvm::ArrayRef getRange(const Decl *D) const { return getRange(D->getBeginLoc(), D->getEndLoc()); } - llvm::ArrayRef getRange(const Stmt *S) const { - return getRange(S->getBeginLoc(), S->getEndLoc()); + llvm::ArrayRef getExprRange(const Expr *E) const { + return getRange(E->getBeginLoc(), E->getEndLoc()); + } + /// Find the adjusted range for the statement, consuming the trailing + /// semicolon when needed. + llvm::ArrayRef getStmtRange(const Stmt *S) const { + auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); + if (isa(S)) + return Tokens; + + // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and + // all statements that end with those. Consume this semicolon here. + // + // (!) statements never consume 'eof', so looking at the next token is ok. + if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) + return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); + return Tokens; } private: @@ -227,16 +253,168 @@ bool WalkUpFromCompoundStmt(CompoundStmt *S) { using NodeRole = syntax::NodeRole; - Builder.markChildToken(S->getLBracLoc(), tok::l_brace, - NodeRole::CompoundStatement_lbrace); + Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen); + for (auto *Child : S->body()) + Builder.markStmtChild(Child, NodeRole::CompoundStatement_statement); Builder.markChildToken(S->getRBracLoc(), tok::r_brace, - NodeRole::CompoundStatement_rbrace); + NodeRole::CloseParen); - Builder.foldNode(Builder.getRange(S), + Builder.foldNode(Builder.getStmtRange(S), new (allocator()) syntax::CompoundStatement); return true; } + // Some statements are not yet handled by syntax trees. + bool WalkUpFromStmt(Stmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::UnknownStatement); + return true; + } + + bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { + // We override to traverse range initializer as VarDecl. + // RAV traverses it as a statement, we produce invalid node kinds in that + // case. + // FIXME: should do this in RAV instead? + if (S->getInit() && !TraverseStmt(S->getInit())) + return false; + if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) + return false; + if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) + return false; + if (S->getBody() && !TraverseStmt(S->getBody())) + return false; + return true; + } + + bool TraverseStmt(Stmt *S) { + if (auto *E = llvm::dyn_cast_or_null(S)) { + // (!) do not recurse into subexpressions. + // we do not have syntax trees for expressions yet, so we only want to see + // the first top-level expression. + return WalkUpFromExpr(E->IgnoreImplicit()); + } + return RecursiveASTVisitor::TraverseStmt(S); + } + + // Some expressions are not yet handled by syntax trees. + bool WalkUpFromExpr(Expr *E) { + assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); + Builder.foldNode(Builder.getExprRange(E), + new (allocator()) syntax::UnknownExpression); + return true; + } + + // The code below is very regular, it could even be generated with some + // preprocessor magic. We merely assign roles to the corresponding children + // and fold resulting nodes. + bool WalkUpFromDeclStmt(DeclStmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::DeclarationStatement); + return true; + } + + bool WalkUpFromNullStmt(NullStmt *S) { + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::EmptyStatement); + return true; + } + + bool WalkUpFromSwitchStmt(SwitchStmt *S) { + Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::SwitchStatement); + return true; + } + + bool WalkUpFromCaseStmt(CaseStmt *S) { + Builder.markChildToken(S->getKeywordLoc(), tok::kw_case, + syntax::NodeRole::IntroducerKeyword); + Builder.markExprChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); + Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::CaseStatement); + return true; + } + + bool WalkUpFromDefaultStmt(DefaultStmt *S) { + Builder.markChildToken(S->getKeywordLoc(), tok::kw_default, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::DefaultStatement); + return true; + } + + bool WalkUpFromIfStmt(IfStmt *S) { + Builder.markChildToken(S->getIfLoc(), tok::kw_if, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getThen(), + syntax::NodeRole::IfStatement_thenStatement); + Builder.markChildToken(S->getElseLoc(), tok::kw_else, + syntax::NodeRole::IfStatement_elseKeyword); + Builder.markStmtChild(S->getElse(), + syntax::NodeRole::IfStatement_elseStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::IfStatement); + return true; + } + + bool WalkUpFromForStmt(ForStmt *S) { + Builder.markChildToken(S->getForLoc(), tok::kw_for, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ForStatement); + return true; + } + + bool WalkUpFromWhileStmt(WhileStmt *S) { + Builder.markChildToken(S->getWhileLoc(), tok::kw_while, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::WhileStatement); + return true; + } + + bool WalkUpFromContinueStmt(ContinueStmt *S) { + Builder.markChildToken(S->getContinueLoc(), tok::kw_continue, + syntax::NodeRole::IntroducerKeyword); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ContinueStatement); + return true; + } + + bool WalkUpFromBreakStmt(BreakStmt *S) { + Builder.markChildToken(S->getBreakLoc(), tok::kw_break, + syntax::NodeRole::IntroducerKeyword); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::BreakStatement); + return true; + } + + bool WalkUpFromReturnStmt(ReturnStmt *S) { + Builder.markChildToken(S->getReturnLoc(), tok::kw_return, + syntax::NodeRole::IntroducerKeyword); + Builder.markExprChild(S->getRetValue(), + syntax::NodeRole::ReturnStatement_value); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::ReturnStatement); + return true; + } + + bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { + Builder.markChildToken(S->getForLoc(), tok::kw_for, + syntax::NodeRole::IntroducerKeyword); + Builder.markStmtChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getStmtRange(S), + new (allocator()) syntax::RangeBasedForStatement); + return true; + } + private: /// A small helper to save some typing. llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } @@ -258,6 +436,26 @@ Pending.assignRole(*findToken(Loc), Role); } +void syntax::TreeBuilder::markStmtChild(Stmt *Child, NodeRole Role) { + if (!Child) + return; + + auto Range = getStmtRange(Child); + // This is an expression in a statement position, consume the trailing + // semicolon and form an 'ExpressionStatement' node. + if (auto *E = dyn_cast(Child)) { + Pending.assignRole(getExprRange(E), + NodeRole::ExpressionStatement_expression); + // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. + Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); + } + Pending.assignRole(Range, Role); +} + +void syntax::TreeBuilder::markExprChild(Expr *Child, NodeRole Role) { + Pending.assignRole(getExprRange(Child), Role); +} + const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { auto Tokens = Arena.tokenBuffer().expandedTokens(); auto &SM = Arena.sourceManager(); diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -18,18 +18,199 @@ return OS << "TranslationUnit"; case NodeKind::TopLevelDeclaration: return OS << "TopLevelDeclaration"; + case NodeKind::UnknownExpression: + return OS << "UnknownExpression"; + case NodeKind::UnknownStatement: + return OS << "UnknownStatement"; + case NodeKind::DeclarationStatement: + return OS << "DeclarationStatement"; + case NodeKind::EmptyStatement: + return OS << "EmptyStatement"; + case NodeKind::SwitchStatement: + return OS << "SwitchStatement"; + case NodeKind::CaseStatement: + return OS << "CaseStatement"; + case NodeKind::DefaultStatement: + return OS << "DefaultStatement"; + case NodeKind::IfStatement: + return OS << "IfStatement"; + case NodeKind::ForStatement: + return OS << "ForStatement"; + case NodeKind::WhileStatement: + return OS << "WhileStatement"; + case NodeKind::ContinueStatement: + return OS << "ContinueStatement"; + case NodeKind::BreakStatement: + return OS << "BreakStatement"; + case NodeKind::ReturnStatement: + return OS << "ReturnStatement"; + case NodeKind::RangeBasedForStatement: + return OS << "RangeBasedForStatement"; + case NodeKind::ExpressionStatement: + return OS << "ExpressionStatement"; case NodeKind::CompoundStatement: return OS << "CompoundStatement"; } llvm_unreachable("unknown node kind"); } +llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { + switch (R) { + case syntax::NodeRole::Detached: + return OS << "Detached"; + case syntax::NodeRole::Unknown: + return OS << "Unknown"; + case syntax::NodeRole::OpenParen: + return OS << "OpenParen"; + case syntax::NodeRole::CloseParen: + return OS << "CloseParen"; + case syntax::NodeRole::IntroducerKeyword: + return OS << "IntroducerKeyword"; + case syntax::NodeRole::BodyStatement: + return OS << "BodyStatement"; + case syntax::NodeRole::CaseStatement_value: + return OS << "CaseStatement_value"; + case syntax::NodeRole::IfStatement_thenStatement: + return OS << "IfStatement_thenStatement"; + case syntax::NodeRole::IfStatement_elseKeyword: + return OS << "IfStatement_elseKeyword"; + case syntax::NodeRole::IfStatement_elseStatement: + return OS << "IfStatement_elseStatement"; + case syntax::NodeRole::ReturnStatement_value: + return OS << "ReturnStatement_value"; + case syntax::NodeRole::ExpressionStatement_expression: + return OS << "ExpressionStatement_expression"; + case syntax::NodeRole::CompoundStatement_statement: + return OS << "CompoundStatement_statement"; + } + llvm_unreachable("invalid role"); +} + +syntax::Leaf *syntax::SwitchStatement::switchKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::SwitchStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::CaseStatement::caseKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Expression *syntax::CaseStatement::value() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::CaseStatement_value)); +} + +syntax::Statement *syntax::CaseStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::DefaultStatement::defaultKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::DefaultStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::IfStatement::ifKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::IfStatement::thenStatement() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IfStatement_thenStatement)); +} + +syntax::Leaf *syntax::IfStatement::elseKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IfStatement_elseKeyword)); +} + +syntax::Statement *syntax::IfStatement::elseStatement() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IfStatement_elseStatement)); +} + +syntax::Leaf *syntax::ForStatement::forKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::ForStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::WhileStatement::whileKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::WhileStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::ContinueStatement::continueKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::BreakStatement::breakKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::ReturnStatement::returnKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Expression *syntax::ReturnStatement::value() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::ReturnStatement_value)); +} + +syntax::Leaf *syntax::RangeBasedForStatement::forKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::RangeBasedForStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Expression *syntax::ExpressionStatement::expression() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::ExpressionStatement_expression)); +} + syntax::Leaf *syntax::CompoundStatement::lbrace() { return llvm::cast_or_null( - findChild(NodeRole::CompoundStatement_lbrace)); + findChild(syntax::NodeRole::OpenParen)); +} + +std::vector syntax::CompoundStatement::statements() { + std::vector Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + if (C->role() == syntax::NodeRole::CompoundStatement_statement) + Children.push_back(llvm::cast(C)); + } + return Children; } syntax::Leaf *syntax::CompoundStatement::rbrace() { return llvm::cast_or_null( - findChild(NodeRole::CompoundStatement_rbrace)); + findChild(syntax::NodeRole::CloseParen)); } diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -85,13 +85,10 @@ static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N, const syntax::Arena &A, std::vector IndentMask) { - if (N->role() != syntax::NodeRole::Unknown) { - // FIXME: print the symbolic name of a role. - if (N->role() == syntax::NodeRole::Detached) - OS << "*: "; - else - OS << static_cast(N->role()) << ": "; - } + if (N->role() == syntax::NodeRole::Detached) + OS << "*: "; + // FIXME: find a nice way to print other roles. + if (auto *L = llvm::dyn_cast(N)) { dumpTokens(OS, *L->token(), A.sourceManager()); OS << "\n"; diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -41,8 +41,8 @@ void HandleTranslationUnit(ASTContext &Ctx) override { Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), - std::move(*Tokens).consume()); + Ctx.getLangOpts(), + std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. Root = syntax::buildSyntaxTree(*Arena, *Ctx.getTranslationUnitDecl()); } @@ -65,7 +65,7 @@ auto Tokens = std::make_unique(CI.getPreprocessor()); return std::make_unique(Root, Arena, - std::move(Tokens)); + std::move(Tokens)); } private: @@ -136,18 +136,315 @@ | |-( | |-) | `-CompoundStatement -| |-2: { -| `-3: } +| |-{ +| `-} `-TopLevelDeclaration |-void |-foo |-( |-) `-CompoundStatement - |-2: { - `-3: } + |-{ + `-} )txt"}, - }; + // if. + { + R"cpp( +int main() { + if (true) {} + if (true) {} else if (false) {} +} + )cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-int + |-main + |-( + |-) + `-CompoundStatement + |-{ + |-IfStatement + | |-if + | |-( + | |-UnknownExpression + | | `-true + | |-) + | `-CompoundStatement + | |-{ + | `-} + |-IfStatement + | |-if + | |-( + | |-UnknownExpression + | | `-true + | |-) + | |-CompoundStatement + | | |-{ + | | `-} + | |-else + | `-IfStatement + | |-if + | |-( + | |-UnknownExpression + | | `-false + | |-) + | `-CompoundStatement + | |-{ + | `-} + `-} + )txt"}, + // for. + {R"cpp( +void test() { + for (;;) {} +} +)cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-ForStatement + | |-for + | |-( + | |-; + | |-; + | |-) + | `-CompoundStatement + | |-{ + | `-} + `-} + )txt"}, + // declaration statement. + {"void test() { int a = 10; }", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-DeclarationStatement + | |-int + | |-a + | |-= + | |-10 + | `-; + `-} +)txt"}, + {"void test() { ; }", R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-EmptyStatement + | `-; + `-} +)txt"}, + // switch, case and default. + {R"cpp( +void test() { + switch (true) { + case 0: + default:; + } +} +)cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-SwitchStatement + | |-switch + | |-( + | |-UnknownExpression + | | `-true + | |-) + | `-CompoundStatement + | |-{ + | |-CaseStatement + | | |-case + | | |-UnknownExpression + | | | `-0 + | | |-: + | | `-DefaultStatement + | | |-default + | | |-: + | | `-EmptyStatement + | | `-; + | `-} + `-} +)txt"}, + // while. + {R"cpp( +void test() { + while (true) { continue; break; } +} +)cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-WhileStatement + | |-while + | |-( + | |-UnknownExpression + | | `-true + | |-) + | `-CompoundStatement + | |-{ + | |-ContinueStatement + | | |-continue + | | `-; + | |-BreakStatement + | | |-break + | | `-; + | `-} + `-} +)txt"}, + // return. + {R"cpp( +int test() { return 1; } + )cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-int + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-ReturnStatement + | |-return + | |-UnknownExpression + | | `-1 + | `-; + `-} +)txt"}, + // Range-based for. + {R"cpp( +void test() { + int a[3]; + for (int x : a) ; +} + )cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-DeclarationStatement + | |-int + | |-a + | |-[ + | |-3 + | |-] + | `-; + |-RangeBasedForStatement + | |-for + | |-( + | |-int + | |-x + | |-: + | |-UnknownExpression + | | `-a + | |-) + | `-EmptyStatement + | `-; + `-} + )txt"}, + // Unhandled statements should end up as 'unknown statement'. + // This example uses a 'label statement', which does not yet have a syntax + // counterpart. + {"void main() { foo: return 100; }", R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-main + |-( + |-) + `-CompoundStatement + |-{ + |-UnknownStatement + | |-foo + | |-: + | `-ReturnStatement + | |-return + | |-UnknownExpression + | | `-100 + | `-; + `-} +)txt"}, + // expressions should be wrapped in 'ExpressionStatement' when they appear + // in a statement position. + {R"cpp( +void test() { + test(); + if (true) test(); else test(); +} + )cpp", + R"txt( +*: TranslationUnit +`-TopLevelDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-ExpressionStatement + | |-UnknownExpression + | | |-test + | | |-( + | | `-) + | `-; + |-IfStatement + | |-if + | |-( + | |-UnknownExpression + | | `-true + | |-) + | |-ExpressionStatement + | | |-UnknownExpression + | | | |-test + | | | |-( + | | | `-) + | | `-; + | |-else + | `-ExpressionStatement + | |-UnknownExpression + | | |-test + | | |-( + | | `-) + | `-; + `-} +)txt"}}; for (const auto &T : Cases) { auto *Root = buildTree(T.first);