diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -26,22 +26,60 @@ Leaf, TranslationUnit, TopLevelDeclaration, + + // Expressions + UnknownExpression, + + // Statements + UnknownStatement, + DeclarationStatement, + EmptyStatement, + SwitchStatement, + CaseStatement, + DefaultStatement, + IfStatement, + ForStatement, + WhileStatement, + ContinueStatement, + BreakStatement, + ReturnStatement, + RangeBasedForStatement, + ExpressionStatement, CompoundStatement }; /// For debugging purposes. llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeKind K); -/// A relation between a parent and child node. Used for implementing accessors. +/// A relation between a parent and child node, e.g. 'left-hand-side of a binary +/// expression'. Used for implementing accessors. enum class NodeRole : uint8_t { - // A node without a parent. + // Roles common to multiple node kinds. + /// A node without a parent Detached, - // Children of an unknown semantic nature, e.g. skipped tokens, comments. + /// Children of an unknown semantic nature, e.g. skipped tokens, comments. Unknown, - // FIXME: should this be shared for all other nodes with braces, e.g. init - // lists? - CompoundStatement_lbrace, - CompoundStatement_rbrace + /// An opening parenthesis in argument lists and blocks, e.g. '{', '(', etc. + OpenParen, + /// A closing parenthesis in argument lists and blocks, e.g. '}', ')', etc. + CloseParen, + /// A keywords that introduces some grammar construct, e.g. 'if', 'try', etc. + IntroducerKeyword, + /// An inner statement for those that have only a single child of kind + /// statement, e.g. loop body for while, for, etc; inner statement for case, + /// default, etc. + BodyStatement, + + // Roles specific to particular node kinds. + CaseStatement_value, + IfStatement_thenStatement, + IfStatement_elseKeyword, + IfStatement_elseStatement, + ReturnStatement_value, + ExpressionStatement_expression, + CompoundStatement_statement }; +/// For debugging purposes. +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, NodeRole R); /// A root node for a translation unit. Parent is always null. class TranslationUnit final : public Tree { @@ -66,16 +104,189 @@ } }; +/// A base class for all expressions. Note that expressions are not statements, +/// even though they are in clang. +class Expression : public Tree { +public: + Expression(NodeKind K) : Tree(K) {} + static bool classof(const Node *N) { + return NodeKind::UnknownExpression <= N->kind() && + N->kind() <= NodeKind::UnknownExpression; + } +}; + +/// An expression of an unknown kind, i.e. one not currently handled by the +/// syntax tree. +class UnknownExpression final : public Expression { +public: + UnknownExpression() : Expression(NodeKind::UnknownExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownExpression; + } +}; + /// An abstract node for C++ statements, e.g. 'while', 'if', etc. +/// FIXME: add accessors for semicolon of statements that have it. class Statement : public Tree { public: Statement(NodeKind K) : Tree(K) {} static bool classof(const Node *N) { - return NodeKind::CompoundStatement <= N->kind() && + return NodeKind::UnknownStatement <= N->kind() && N->kind() <= NodeKind::CompoundStatement; } }; +/// A statement of an unknown kind, i.e. one not currently handled by the syntax +/// tree. +class UnknownStatement final : public Statement { +public: + UnknownStatement() : Statement(NodeKind::UnknownStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownStatement; + } +}; + +/// E.g. 'int a, b = 10;' +class DeclarationStatement final : public Statement { +public: + DeclarationStatement() : Statement(NodeKind::DeclarationStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::DeclarationStatement; + } +}; + +/// The no-op statement, i.e. ';'. +class EmptyStatement final : public Statement { +public: + EmptyStatement() : Statement(NodeKind::EmptyStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::EmptyStatement; + } +}; + +/// switch () +class SwitchStatement final : public Statement { +public: + SwitchStatement() : Statement(NodeKind::SwitchStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::SwitchStatement; + } + syntax::Leaf *switchKeyword(); + syntax::Statement *body(); +}; + +/// case : +class CaseStatement final : public Statement { +public: + CaseStatement() : Statement(NodeKind::CaseStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::CaseStatement; + } + syntax::Leaf *caseKeyword(); + syntax::Expression *value(); + syntax::Statement *body(); +}; + +/// default: +class DefaultStatement final : public Statement { +public: + DefaultStatement() : Statement(NodeKind::DefaultStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::DefaultStatement; + } + syntax::Leaf *defaultKeyword(); + syntax::Statement *body(); +}; + +/// if (cond) else +/// FIXME: add condition that models 'expression or variable declaration' +class IfStatement final : public Statement { +public: + IfStatement() : Statement(NodeKind::IfStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::IfStatement; + } + syntax::Leaf *ifKeyword(); + syntax::Statement *thenStatement(); + syntax::Leaf *elseKeyword(); + syntax::Statement *elseStatement(); +}; + +/// for (; ; ) +class ForStatement final : public Statement { +public: + ForStatement() : Statement(NodeKind::ForStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ForStatement; + } + syntax::Leaf *forKeyword(); + syntax::Statement *body(); +}; + +/// while () +class WhileStatement final : public Statement { +public: + WhileStatement() : Statement(NodeKind::WhileStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::WhileStatement; + } + syntax::Leaf *whileKeyword(); + syntax::Statement *body(); +}; + +/// continue; +class ContinueStatement final : public Statement { +public: + ContinueStatement() : Statement(NodeKind::ContinueStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ContinueStatement; + } + syntax::Leaf *continueKeyword(); +}; + +/// break; +class BreakStatement final : public Statement { +public: + BreakStatement() : Statement(NodeKind::BreakStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::BreakStatement; + } + syntax::Leaf *breakKeyword(); +}; + +/// return ; +class ReturnStatement final : public Statement { +public: + ReturnStatement() : Statement(NodeKind::ReturnStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ReturnStatement; + } + syntax::Leaf *returnKeyword(); + syntax::Expression *value(); +}; + +/// for ( : ) +class RangeBasedForStatement final : public Statement { +public: + RangeBasedForStatement() : Statement(NodeKind::RangeBasedForStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::RangeBasedForStatement; + } + syntax::Leaf *forKeyword(); + syntax::Statement *body(); +}; + +/// Expression in a statement position, e.g. functions calls inside compound +/// statements or inside a loop body. +class ExpressionStatement final : public Statement { +public: + ExpressionStatement() : Statement(NodeKind::ExpressionStatement) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::ExpressionStatement; + } + syntax::Expression *expression(); +}; + /// { statement1; statement2; … } class CompoundStatement final : public Statement { public: @@ -84,6 +295,8 @@ return N->kind() == NodeKind::CompoundStatement; } syntax::Leaf *lbrace(); + /// FIXME: use custom iterator instead of 'vector'. + std::vector statements(); syntax::Leaf *rbrace(); }; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -27,6 +27,8 @@ using namespace clang; +static bool isImplicitExpr(clang::Expr *E) { return E->IgnoreImplicit() != E; } + /// A helper class for constructing the syntax tree while traversing a clang /// AST. /// @@ -52,6 +54,15 @@ /// Range. void foldNode(llvm::ArrayRef Range, syntax::Tree *New); + /// Mark the \p Child node with a corresponding \p Role. All marked children + /// should be consumed by foldNode. + /// (!) this overload should only be called for expressions in a statement + /// position, it will wrap expressions into expression statement. + void markChild(Stmt *Child, NodeRole Role); + /// It is important to call this overload for expressions in non-statement + /// position to avoid wrapping into expression statement. + void markChild(Expr *Child, NodeRole Role); + /// Set role for a token starting at \p Loc. void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R); @@ -80,8 +91,23 @@ llvm::ArrayRef getRange(const Decl *D) const { return getRange(D->getBeginLoc(), D->getEndLoc()); } + llvm::ArrayRef getRange(const Expr *E) const { + return getRange(E->getBeginLoc(), E->getEndLoc()); + } + /// Find the adjusted range for the statement, consuming the trailing + /// semicolon when needed. llvm::ArrayRef getRange(const Stmt *S) const { - return getRange(S->getBeginLoc(), S->getEndLoc()); + auto Tokens = getRange(S->getBeginLoc(), S->getEndLoc()); + if (isa(S)) + return Tokens; + + // Some statements miss a trailing semicolon, e.g. 'return', 'continue' and + // all statements that end with those. Consume this semicolon here. + // + // (!) statements never consume 'eof', so looking at the next token is ok. + if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi) + return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1); + return Tokens; } private: @@ -223,16 +249,168 @@ bool WalkUpFromCompoundStmt(CompoundStmt *S) { using NodeRole = syntax::NodeRole; - Builder.markChildToken(S->getLBracLoc(), tok::l_brace, - NodeRole::CompoundStatement_lbrace); + Builder.markChildToken(S->getLBracLoc(), tok::l_brace, NodeRole::OpenParen); + for (auto *Child : S->body()) + Builder.markChild(Child, NodeRole::CompoundStatement_statement); Builder.markChildToken(S->getRBracLoc(), tok::r_brace, - NodeRole::CompoundStatement_rbrace); + NodeRole::CloseParen); Builder.foldNode(Builder.getRange(S), new (allocator()) syntax::CompoundStatement); return true; } + // Some statements are not yet handled by syntax trees. + bool WalkUpFromStmt(Stmt *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::UnknownStatement); + return true; + } + + bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) { + // We override to traverse range initializer as VarDecl. + // RAT traverses it as a statement, we produce invalid node kinds in that + // case. + // FIXME: should do this in RAT instead? + if (S->getInit() && !TraverseStmt(S->getInit())) + return false; + if (S->getLoopVariable() && !TraverseDecl(S->getLoopVariable())) + return false; + if (S->getRangeInit() && !TraverseStmt(S->getRangeInit())) + return false; + if (S->getBody() && !TraverseStmt(S->getBody())) + return false; + return true; + } + + // Some expressions are not yet handled by syntax trees. + bool WalkUpFromExpr(Expr *E) { + assert(!isImplicitExpr(E) && "should be handled by TraverseStmt"); + Builder.foldNode(Builder.getRange(E), + new (allocator()) syntax::UnknownExpression); + return true; + } + + bool TraverseStmt(Stmt *S) { + if (auto *E = llvm::dyn_cast_or_null(S)) { + // (!) do not recurse into subexpressions. + // we do not have syntax trees for expressions yet, so we only want to see + // the first top-level expression. + return WalkUpFromExpr(E->IgnoreImplicit()); + } + return RecursiveASTVisitor::TraverseStmt(S); + } + + // The code below is very regular, it could even be generated with some + // preprocessor magic. We merely assign roles to the corresponding children + // and fold resulting nodes. + bool WalkUpFromDeclStmt(DeclStmt *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::DeclarationStatement); + return true; + } + + bool WalkUpFromNullStmt(NullStmt *S) { + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::EmptyStatement); + return true; + } + + bool WalkUpFromSwitchStmt(SwitchStmt *S) { + Builder.markChildToken(S->getSwitchLoc(), tok::kw_switch, + syntax::NodeRole::IntroducerKeyword); + Builder.markChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::SwitchStatement); + return true; + } + + bool WalkUpFromCaseStmt(CaseStmt *S) { + Builder.markChildToken(S->getKeywordLoc(), tok::kw_case, + syntax::NodeRole::IntroducerKeyword); + Builder.markChild(S->getLHS(), syntax::NodeRole::CaseStatement_value); + Builder.markChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::CaseStatement); + return true; + } + + bool WalkUpFromDefaultStmt(DefaultStmt *S) { + Builder.markChildToken(S->getKeywordLoc(), tok::kw_default, + syntax::NodeRole::IntroducerKeyword); + Builder.markChild(S->getSubStmt(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::DefaultStatement); + return true; + } + + bool WalkUpFromIfStmt(IfStmt *S) { + Builder.markChildToken(S->getIfLoc(), tok::kw_if, + syntax::NodeRole::IntroducerKeyword); + Builder.markChild(S->getThen(), + syntax::NodeRole::IfStatement_thenStatement); + Builder.markChildToken(S->getElseLoc(), tok::kw_else, + syntax::NodeRole::IfStatement_elseKeyword); + Builder.markChild(S->getElse(), + syntax::NodeRole::IfStatement_elseStatement); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::IfStatement); + return true; + } + + bool WalkUpFromForStmt(ForStmt *S) { + Builder.markChildToken(S->getForLoc(), tok::kw_for, + syntax::NodeRole::IntroducerKeyword); + Builder.markChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::ForStatement); + return true; + } + + bool WalkUpFromWhileStmt(WhileStmt *S) { + Builder.markChildToken(S->getWhileLoc(), tok::kw_while, + syntax::NodeRole::IntroducerKeyword); + Builder.markChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::WhileStatement); + return true; + } + + bool WalkUpFromContinueStmt(ContinueStmt *S) { + Builder.markChildToken(S->getContinueLoc(), tok::kw_continue, + syntax::NodeRole::IntroducerKeyword); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::ContinueStatement); + return true; + } + + bool WalkUpFromBreakStmt(BreakStmt *S) { + Builder.markChildToken(S->getBreakLoc(), tok::kw_break, + syntax::NodeRole::IntroducerKeyword); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::BreakStatement); + return true; + } + + bool WalkUpFromReturnStmt(ReturnStmt *S) { + Builder.markChildToken(S->getReturnLoc(), tok::kw_return, + syntax::NodeRole::IntroducerKeyword); + Builder.markChild(S->getRetValue(), + syntax::NodeRole::ReturnStatement_value); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::ReturnStatement); + return true; + } + + bool WalkUpFromCXXForRangeStmt(CXXForRangeStmt *S) { + Builder.markChildToken(S->getForLoc(), tok::kw_for, + syntax::NodeRole::IntroducerKeyword); + Builder.markChild(S->getBody(), syntax::NodeRole::BodyStatement); + Builder.foldNode(Builder.getRange(S), + new (allocator()) syntax::RangeBasedForStatement); + return true; + } + private: /// A small helper to save some typing. llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); } @@ -254,6 +432,25 @@ Pending.assignRole(*findToken(Loc), Role); } +void syntax::TreeBuilder::markChild(Stmt *Child, NodeRole Role) { + if (!Child) + return; + + auto Range = getRange(Child); + // This is an expression in a statement position, consume the trailing + // semicolon and form an 'ExpressionStatement' node. + if (auto *E = dyn_cast(Child)) { + Pending.assignRole(getRange(E), NodeRole::ExpressionStatement_expression); + // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. + Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); + } + Pending.assignRole(Range, Role); +} + +void syntax::TreeBuilder::markChild(Expr *Child, NodeRole Role) { + Pending.assignRole(getRange(Child), Role); +} + const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const { auto Tokens = Arena.tokenBuffer().expandedTokens(); auto &SM = Arena.sourceManager(); diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -18,18 +18,199 @@ return OS << "TranslationUnit"; case NodeKind::TopLevelDeclaration: return OS << "TopLevelDeclaration"; + case NodeKind::UnknownExpression: + return OS << "UnknownExpression"; + case NodeKind::UnknownStatement: + return OS << "UnknownStatement"; + case NodeKind::DeclarationStatement: + return OS << "DeclarationStatement"; + case NodeKind::EmptyStatement: + return OS << "EmptyStatement"; + case NodeKind::SwitchStatement: + return OS << "SwitchStatement"; + case NodeKind::CaseStatement: + return OS << "CaseStatement"; + case NodeKind::DefaultStatement: + return OS << "DefaultStatement"; + case NodeKind::IfStatement: + return OS << "IfStatement"; + case NodeKind::ForStatement: + return OS << "ForStatement"; + case NodeKind::WhileStatement: + return OS << "WhileStatement"; + case NodeKind::ContinueStatement: + return OS << "ContinueStatement"; + case NodeKind::BreakStatement: + return OS << "BreakStatement"; + case NodeKind::ReturnStatement: + return OS << "ReturnStatement"; + case NodeKind::RangeBasedForStatement: + return OS << "RangeBasedForStatement"; + case NodeKind::ExpressionStatement: + return OS << "ExpressionStatement"; case NodeKind::CompoundStatement: return OS << "CompoundStatement"; } llvm_unreachable("unknown node kind"); } +llvm::raw_ostream &syntax::operator<<(llvm::raw_ostream &OS, NodeRole R) { + switch (R) { + case syntax::NodeRole::Detached: + return OS << "Detached"; + case syntax::NodeRole::Unknown: + return OS << "Unknown"; + case syntax::NodeRole::OpenParen: + return OS << "OpenParen"; + case syntax::NodeRole::CloseParen: + return OS << "CloseParen"; + case syntax::NodeRole::IntroducerKeyword: + return OS << "IntroducerKeyword"; + case syntax::NodeRole::BodyStatement: + return OS << "BodyStatement"; + case syntax::NodeRole::CaseStatement_value: + return OS << "CaseStatement_value"; + case syntax::NodeRole::IfStatement_thenStatement: + return OS << "IfStatement_thenStatement"; + case syntax::NodeRole::IfStatement_elseKeyword: + return OS << "IfStatement_elseKeyword"; + case syntax::NodeRole::IfStatement_elseStatement: + return OS << "IfStatement_elseStatement"; + case syntax::NodeRole::ReturnStatement_value: + return OS << "ReturnStatement_value"; + case syntax::NodeRole::ExpressionStatement_expression: + return OS << "ExpressionStatement_expression"; + case syntax::NodeRole::CompoundStatement_statement: + return OS << "CompoundStatement_statement"; + } + llvm_unreachable("invalid role"); +} + +syntax::Leaf *syntax::SwitchStatement::switchKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::SwitchStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::CaseStatement::caseKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Expression *syntax::CaseStatement::value() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::CaseStatement_value)); +} + +syntax::Statement *syntax::CaseStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::DefaultStatement::defaultKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::DefaultStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::IfStatement::ifKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::IfStatement::thenStatement() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IfStatement_thenStatement)); +} + +syntax::Leaf *syntax::IfStatement::elseKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IfStatement_elseKeyword)); +} + +syntax::Statement *syntax::IfStatement::elseStatement() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IfStatement_elseStatement)); +} + +syntax::Leaf *syntax::ForStatement::forKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::ForStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::WhileStatement::whileKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::WhileStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Leaf *syntax::ContinueStatement::continueKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::BreakStatement::breakKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Leaf *syntax::ReturnStatement::returnKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Expression *syntax::ReturnStatement::value() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::ReturnStatement_value)); +} + +syntax::Leaf *syntax::RangeBasedForStatement::forKeyword() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::IntroducerKeyword)); +} + +syntax::Statement *syntax::RangeBasedForStatement::body() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::BodyStatement)); +} + +syntax::Expression *syntax::ExpressionStatement::expression() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::ExpressionStatement_expression)); +} + syntax::Leaf *syntax::CompoundStatement::lbrace() { return llvm::cast_or_null( - findChild(NodeRole::CompoundStatement_lbrace)); + findChild(syntax::NodeRole::OpenParen)); +} + +std::vector syntax::CompoundStatement::statements() { + std::vector Children; + for (auto *C = firstChild(); C; C = C->nextSibling()) { + if (C->role() == syntax::NodeRole::CompoundStatement_statement) + Children.push_back(llvm::cast(C)); + } + return Children; } syntax::Leaf *syntax::CompoundStatement::rbrace() { return llvm::cast_or_null( - findChild(NodeRole::CompoundStatement_rbrace)); + findChild(syntax::NodeRole::CloseParen)); } diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -85,13 +85,10 @@ static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N, const syntax::Arena &A, std::vector IndentMask) { - if (N->role() != syntax::NodeRole::Unknown) { - // FIXME: print the symbolic name of a role. - if (N->role() == syntax::NodeRole::Detached) - OS << "*: "; - else - OS << static_cast(N->role()) << ": "; - } + if (N->role() == syntax::NodeRole::Detached) + OS << "*: "; + // FIXME: find a nice way to print other roles. + if (auto *L = llvm::dyn_cast(N)) { dumpTokens(OS, *L->token(), A.sourceManager()); OS << "\n"; diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -136,19 +136,326 @@ | |-( | |-) | `-CompoundStatement -| |-2: { -| `-3: } +| |-{ +| `-} |-TopLevelDeclaration | |-void | |-foo | |-( | |-) | `-CompoundStatement -| |-2: { -| `-3: } +| |-{ +| `-} `- )txt"}, - }; + // if. + { + R"cpp( +int main() { + if (true) {} + if (true) {} else if (false) {} +} + )cpp", + R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-int +| |-main +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-IfStatement +| | |-if +| | |-( +| | |-UnknownExpression +| | | `-true +| | |-) +| | `-CompoundStatement +| | |-{ +| | `-} +| |-IfStatement +| | |-if +| | |-( +| | |-UnknownExpression +| | | `-true +| | |-) +| | |-CompoundStatement +| | | |-{ +| | | `-} +| | |-else +| | `-IfStatement +| | |-if +| | |-( +| | |-UnknownExpression +| | | `-false +| | |-) +| | `-CompoundStatement +| | |-{ +| | `-} +| `-} +`- + )txt"}, + // for. + {R"cpp( +void test() { + for (;;) {} +} +)cpp", + R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-ForStatement +| | |-for +| | |-( +| | |-; +| | |-; +| | |-) +| | `-CompoundStatement +| | |-{ +| | `-} +| `-} +`- + )txt"}, + // declaration statement. + {"void test() { int a = 10; }", + R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-DeclarationStatement +| | |-int +| | |-a +| | |-= +| | |-10 +| | `-; +| `-} +`- +)txt"}, + {"void test() { ; }", R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-EmptyStatement +| | `-; +| `-} +`- +)txt"}, + // switch, case and default. + {R"cpp( +void test() { + switch (true) { + case 0: + default:; + } +} +)cpp", + R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-SwitchStatement +| | |-switch +| | |-( +| | |-UnknownExpression +| | | `-true +| | |-) +| | `-CompoundStatement +| | |-{ +| | |-CaseStatement +| | | |-case +| | | |-UnknownExpression +| | | | `-0 +| | | |-: +| | | `-DefaultStatement +| | | |-default +| | | |-: +| | | `-EmptyStatement +| | | `-; +| | `-} +| `-} +`- +)txt"}, + // while. + {R"cpp( +void test() { + while (true) { continue; break; } +} +)cpp", + R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-WhileStatement +| | |-while +| | |-( +| | |-UnknownExpression +| | | `-true +| | |-) +| | `-CompoundStatement +| | |-{ +| | |-ContinueStatement +| | | |-continue +| | | `-; +| | |-BreakStatement +| | | |-break +| | | `-; +| | `-} +| `-} +`- +)txt"}, + // return. + {R"cpp( +int test() { return 1; } + )cpp", + R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-int +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-ReturnStatement +| | |-return +| | |-UnknownExpression +| | | `-1 +| | `-; +| `-} +`- + )txt"}, + // Range-based for. + {R"cpp( +void test() { + int a[3]; + for (int x : a) ; +} + )cpp", + R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-DeclarationStatement +| | |-int +| | |-a +| | |-[ +| | |-3 +| | |-] +| | `-; +| |-RangeBasedForStatement +| | |-for +| | |-( +| | |-int +| | |-x +| | |-: +| | |-UnknownExpression +| | | `-a +| | |-) +| | `-EmptyStatement +| | `-; +| `-} +`- + )txt"}, + // Unhandled statements should end up as 'unknown statement'. + // This example uses a 'label statement', which does not yet have a syntax + // counterpart. + {"void main() { foo: return 100; }", R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-main +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-UnknownStatement +| | |-foo +| | |-: +| | `-ReturnStatement +| | |-return +| | |-UnknownExpression +| | | `-100 +| | `-; +| `-} +`- +)txt"}, + // expressions should be wrapped in 'ExpressionStatement' when they appear + // in a statement position. + {R"cpp( +void test() { + test(); + if (true) test(); else test(); +} + )cpp", + R"txt( +*: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-ExpressionStatement +| | |-UnknownExpression +| | | |-test +| | | |-( +| | | `-) +| | `-; +| |-IfStatement +| | |-if +| | |-( +| | |-UnknownExpression +| | | `-true +| | |-) +| | |-ExpressionStatement +| | | |-UnknownExpression +| | | | |-test +| | | | |-( +| | | | `-) +| | | `-; +| | |-else +| | `-ExpressionStatement +| | |-UnknownExpression +| | | |-test +| | | |-( +| | | `-) +| | `-; +| `-} +`- + )txt"}}; for (const auto &T : Cases) { auto *Root = buildTree(T.first);