diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ b/clang/include/clang/Tooling/Syntax/BuildTree.h @@ -11,7 +11,9 @@ #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H #include "clang/AST/Decl.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tree.h" namespace clang { namespace syntax { @@ -19,6 +21,13 @@ /// Build a syntax tree for the main file. syntax::TranslationUnit *buildSyntaxTree(Arena &A, const clang::TranslationUnitDecl &TU); + +// Create syntax trees from subtrees not backed by the source code. + +clang::syntax::Leaf *createPunctuation(clang::syntax::Arena &A, + clang::tok::TokenKind K); +clang::syntax::EmptyStatement *createEmptyStatement(clang::syntax::Arena &A); + } // namespace syntax } // namespace clang #endif diff --git a/clang/include/clang/Tooling/Syntax/Mutations.h b/clang/include/clang/Tooling/Syntax/Mutations.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/Mutations.h @@ -0,0 +1,37 @@ +//===- Mutations.h - mutate syntax trees --------------------*- C++ ---*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Defines high-level APIs for transforming syntax trees and producing the +// corresponding textual replacements. +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLING_SYNTAX_MUTATIONS_H +#define LLVM_CLANG_TOOLING_SYNTAX_MUTATIONS_H + +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tree.h" + +namespace clang { +namespace syntax { + +/// Computes textual replacements required to mimic the tree modifications made +/// to the syntax tree. +tooling::Replacements computeReplacements(const Arena &A, + const syntax::TranslationUnit &TU); + +/// Removes a statement or replaces it with an empty statement where one is +/// required syntactically. E.g., in the following example: +/// if (cond) { foo(); } else bar(); +/// One can remove `foo();` completely and to remove `bar();` we would need to +/// replace it with an empty statement. +/// EXPECTS: S->canModify() == true +void removeStatement(syntax::Arena &A, syntax::Statement *S); + +} // namespace syntax +} // namespace clang + +#endif diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -78,6 +78,10 @@ /// Gets the substring that this FileRange refers to. llvm::StringRef text(const SourceManager &SM) const; + /// Convert to the clang range. The returned range is always a char range, + /// never a token range. + CharSourceRange toCharRange(const SourceManager &SM) const; + friend bool operator==(const FileRange &L, const FileRange &R) { return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End); } diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -65,6 +65,9 @@ class Tree; class TreeBuilder; +class FactoryImpl; +class MutationsImpl; + enum class NodeKind : uint16_t; enum class NodeRole : uint8_t; @@ -79,6 +82,23 @@ NodeKind kind() const { return static_cast(Kind); } NodeRole role() const { return static_cast(Role); } + /// Whether the node is detached from a tree, i.e. does not have a parent. + bool isDetached() const; + /// Whether the node was created from the AST backed by the source code + /// rather than added later through mutation APIs or created with factory + /// functions. + /// When this flag is true, all subtrees are also original. + /// This flag is set to false on any modifications to the node or any of its + /// subtrees, even if this simply involves swapping existing subtrees. + bool isOriginal() const { return Original; } + /// If this function return false, the tree cannot be modified because there + /// is no reasonable way to produce the corresponding textual replacements. + /// This can happen when the node crosses macro expansion boundaries. + /// + /// Note that even if the node is not modifiable, its child nodes can be + /// modifiable. + bool canModify() const { return CanModify; } + const Tree *parent() const { return Parent; } Tree *parent() { return Parent; } @@ -93,11 +113,17 @@ private: // Tree is allowed to change the Parent link and Role. friend class Tree; + // TreeBuilder is allowed to set the Original and CanModify flags. + friend class TreeBuilder; + // MutationsImpl sets roles and CanModify flag. + friend class MutationsImpl; Tree *Parent; Node *NextSibling; unsigned Kind : 16; unsigned Role : 8; + unsigned Original : 1; + unsigned CanModify : 1; }; /// A leaf node points to a single token inside the expanded token stream. @@ -121,6 +147,14 @@ Node *firstChild() { return FirstChild; } const Node *firstChild() const { return FirstChild; } + Leaf *firstLeaf(); + const Leaf *firstLeaf() const { + return const_cast(this)->firstLeaf(); + } + + Leaf *lastLeaf(); + const Leaf *lastLeaf() const { return const_cast(this)->lastLeaf(); } + protected: /// Find the first node with a corresponding role. syntax::Node *findChild(NodeRole R); @@ -128,10 +162,18 @@ private: /// Prepend \p Child to the list of children and and sets the parent pointer. /// A very low-level operation that does not check any invariants, only used - /// by TreeBuilder. + /// by TreeBuilder and FactoryImpl. /// EXPECTS: Role != NodeRoleDetached. void prependChildLowLevel(Node *Child, NodeRole Role); friend class TreeBuilder; + friend class FactoryImpl; + + /// Replace a range of children [BeforeBegin->NextSibling, End) with a list of + /// new nodes starting at \p New. + /// Only used by MutationsImpl to implement higher-level mutation operations. + /// (!) \p New can be null to model removal of the child range. + void replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, Node *New); + friend class MutationsImpl; Node *FirstChild = nullptr; }; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -25,6 +25,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include @@ -85,7 +86,7 @@ assert(Tokens.back().kind() == tok::eof); // Build the root of the tree, consuming all the children. - Pending.foldChildren(Tokens.drop_back(), + Pending.foldChildren(Arena, Tokens.drop_back(), new (Arena.allocator()) syntax::TranslationUnit); return cast(std::move(Pending).finalize()); @@ -156,9 +157,12 @@ assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) - Trees.insert(Trees.end(), - {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); + for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) { + auto *L = new (A.allocator()) syntax::Leaf(&T); + L->Original = true; + L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); + Trees.insert(Trees.end(), {&T, NodeAndRole{L}}); + } } ~Forest() { assert(DelayedFolds.empty()); } @@ -176,18 +180,19 @@ } /// Add \p Node to the forest and attach child nodes based on \p Tokens. - void foldChildren(llvm::ArrayRef Tokens, + void foldChildren(const syntax::Arena &A, + llvm::ArrayRef Tokens, syntax::Tree *Node) { // Execute delayed folds inside `Tokens`. auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin()); auto It = BeginExecuted; for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It) - foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End), + foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End), It->second.Node); DelayedFolds.erase(BeginExecuted, It); // Attach children to `Node`. - foldChildrenEager(Tokens, Node); + foldChildrenEager(A, Tokens, Node); } /// Schedule a call to `foldChildren` that will only be executed when @@ -244,7 +249,8 @@ private: /// Implementation detail of `foldChildren`, does acutal folding ignoring /// delayed folds. - void foldChildrenEager(llvm::ArrayRef Tokens, + void foldChildrenEager(const syntax::Arena &A, + llvm::ArrayRef Tokens, syntax::Tree *Node) { assert(Node->firstChild() == nullptr && "node already has children"); @@ -263,6 +269,10 @@ Node->prependChildLowLevel(std::prev(It)->second.Node, std::prev(It)->second.Role); + // Mark that this node came from the AST and is backed by the source code. + Node->Original = true; + Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue(); + Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, NodeAndRole(Node)}); } @@ -585,7 +595,7 @@ void syntax::TreeBuilder::foldNode(llvm::ArrayRef Range, syntax::Tree *New) { - Pending.foldChildren(Range, New); + Pending.foldChildren(Arena, Range, New); } void syntax::TreeBuilder::noticeDeclaratorRange( @@ -617,7 +627,8 @@ Pending.assignRole(getExprRange(E), NodeRole::ExpressionStatement_expression); // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. - Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); + Pending.foldChildren(Arena, Range, + new (allocator()) syntax::ExpressionStatement); } Pending.assignRole(Range, Role); } diff --git a/clang/lib/Tooling/Syntax/CMakeLists.txt b/clang/lib/Tooling/Syntax/CMakeLists.txt --- a/clang/lib/Tooling/Syntax/CMakeLists.txt +++ b/clang/lib/Tooling/Syntax/CMakeLists.txt @@ -2,7 +2,10 @@ add_clang_library(clangToolingSyntax BuildTree.cpp + ComputeReplacements.cpp Nodes.cpp + Mutations.cpp + Synthesis.cpp Tokens.cpp Tree.cpp diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -0,0 +1,126 @@ +//===- ComputeReplacements.cpp --------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/Support/Error.h" + +using namespace clang; + +namespace { +using ProcessTokensFn = llvm::function_ref, + bool /*IsOriginal*/)>; +/// Enumerates spans of tokens from the tree consecutively laid out in memory. +void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { + struct Enumerator { + Enumerator(ProcessTokensFn Callback) + : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), + Callback(Callback) {} + + void run(const syntax::Tree *Root) { + process(Root); + // Report the last span to the user. + if (SpanBegin) + Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); + } + + private: + void process(const syntax::Node *N) { + if (auto *T = dyn_cast(N)) { + for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) + process(C); + return; + } + + auto *L = cast(N); + if (SpanEnd == L->token() && SpanIsOriginal == L->isOriginal()) { + // Extend the current span. + ++SpanEnd; + return; + } + // Report the current span to the user. + if (SpanBegin) + Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); + // Start recording a new span. + SpanBegin = L->token(); + SpanEnd = SpanBegin + 1; + SpanIsOriginal = L->isOriginal(); + } + + const syntax::Token *SpanBegin; + const syntax::Token *SpanEnd; + bool SpanIsOriginal; + ProcessTokensFn Callback; + }; + + return Enumerator(Callback).run(Root); +} + +syntax::FileRange rangeOfExpanded(const syntax::Arena &A, + llvm::ArrayRef Expanded) { + auto &Buffer = A.tokenBuffer(); + auto &SM = A.sourceManager(); + + // Check that \p Expanded actually points into expanded tokens. + assert(Buffer.expandedTokens().begin() <= Expanded.begin()); + assert(Expanded.end() < Buffer.expandedTokens().end()); + + if (Expanded.empty()) + // (!) empty tokens must always point before end(). + return syntax::FileRange( + SM, SM.getExpansionLoc(Expanded.begin()->location()), /*Length=*/0); + + auto Spelled = Buffer.spelledForExpanded(Expanded); + assert(Spelled && "could not find spelled tokens for expanded"); + return syntax::Token::range(SM, Spelled->front(), Spelled->back()); +} +} // namespace + +tooling::Replacements +syntax::computeReplacements(const syntax::Arena &A, + const syntax::TranslationUnit &TU) { + auto &Buffer = A.tokenBuffer(); + auto &SM = A.sourceManager(); + + tooling::Replacements Replacements; + // Text inserted by the replacement we are building now. + std::string Replacement; + auto emitReplacement = [&](llvm::ArrayRef ReplacedRange) { + if (ReplacedRange.empty() && Replacement.empty()) + return; + llvm::cantFail(Replacements.add(tooling::Replacement( + SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement))); + Replacement = ""; + }; + + const syntax::Token *NextOriginal = Buffer.expandedTokens().begin(); + enumerateTokenSpans( + &TU, [&](llvm::ArrayRef Tokens, bool IsOriginal) { + if (!IsOriginal) { + Replacement += + syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM); + return; + } + assert(NextOriginal <= Tokens.begin()); + // We are looking at a span of original tokens. + if (NextOriginal != Tokens.begin()) { + // There is a gap, record a replacement or deletion. + emitReplacement(llvm::makeArrayRef(NextOriginal, Tokens.begin())); + } else { + // No gap, but we may have pending insertions. Emit them now. + emitReplacement(llvm::makeArrayRef(NextOriginal, /*Length=*/0)); + } + NextOriginal = Tokens.end(); + }); + + // We might have pending replacements at the end of file. If so, emit them. + emitReplacement(llvm::makeArrayRef( + NextOriginal, Buffer.expandedTokens().drop_back().end())); + + return Replacements; +} diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -0,0 +1,84 @@ +//===- Mutations.cpp ------------------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Token.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include +#include + +using namespace clang; + +// This class has access to the internals of tree nodes. Its sole purpose is to +// define helpers that allow implementing the high-level mutation operations. +class syntax::MutationsImpl { +public: + /// Add a new node with a specified role. + static void addAfter(syntax::Node *Anchor, syntax::Node *New, NodeRole Role) { + assert(New->Parent == nullptr); + assert(New->NextSibling == nullptr); + assert(!New->isDetached()); + assert(Role != NodeRole::Detached); + + New->Role = static_cast(Role); + Anchor->parent()->replaceChildRangeLowLevel(Anchor, Anchor, New); + } + + /// Replace the node, keeping the role. + static void replace(syntax::Node *Old, syntax::Node *New) { + assert(New->Parent == nullptr); + assert(New->NextSibling == nullptr); + assert(New->isDetached()); + + New->Role = Old->Role; + Old->parent()->replaceChildRangeLowLevel(findPrevious(Old), + Old->nextSibling(), New); + } + + /// Completely remove the node from its parent. + static void remove(syntax::Node *N) { + N->parent()->replaceChildRangeLowLevel(findPrevious(N), N->nextSibling(), + /*New=*/nullptr); + } + +private: + static syntax::Node *findPrevious(syntax::Node *N) { + if (N->parent()->firstChild() == N) + return nullptr; + for (syntax::Node *C = N->parent()->firstChild(); C != nullptr; + C = C->nextSibling()) { + if (C->nextSibling() == N) + return C; + } + llvm_unreachable("could not find a child node"); + } +}; + +void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) { + assert(S->canModify()); + + if (auto *Parent = llvm::dyn_cast(S->parent())) { + // A child of CompoundStatement can just be safely removed. + MutationsImpl::remove(S); + return; + } + // For the rest, we have to replace with an empty statement. + if (isa(S)) + return; // already an empty statement, nothing to do. + + MutationsImpl::replace(S, createEmptyStatement(A)); +} diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -0,0 +1,38 @@ +//===- Synthesis.cpp ------------------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Syntax/BuildTree.h" + +using namespace clang; + +/// Exposes private syntax tree APIs required to implement node synthesis. +/// Should not be used for anything else. +class syntax::FactoryImpl { +public: + static void prependChildLowLevel(syntax::Tree *T, syntax::Node *Child, + syntax::NodeRole R) { + T->prependChildLowLevel(Child, R); + } +}; + +clang::syntax::Leaf *syntax::createPunctuation(clang::syntax::Arena &A, + clang::tok::TokenKind K) { + auto Tokens = A.lexBuffer(llvm::MemoryBuffer::getMemBuffer( + clang::tok::getPunctuatorSpelling(K))) + .second; + assert(Tokens.size() == 1); + assert(Tokens.front().kind() == K); + return new (A.allocator()) clang::syntax::Leaf(Tokens.begin()); +} + +clang::syntax::EmptyStatement * +syntax::createEmptyStatement(clang::syntax::Arena &A) { + auto *S = new (A.allocator()) clang::syntax::EmptyStatement; + FactoryImpl::prependChildLowLevel(S, createPunctuation(A, clang::tok::semi), + NodeRole::Unknown); + return S; +} diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -67,7 +67,7 @@ auto F = First.range(SM); auto L = Last.range(SM); assert(F.file() == L.file() && "tokens from different files"); - assert(F.endOffset() <= L.beginOffset() && "wrong order of tokens"); + assert(F == L || F.endOffset() <= L.beginOffset() && "wrong order of tokens"); return FileRange(F.file(), F.beginOffset(), L.endOffset()); } @@ -135,6 +135,12 @@ return {Begin, End}; } +CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { + return CharSourceRange( + SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)), + /*IsTokenRange=*/false); +} + std::pair TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -40,7 +40,10 @@ syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), Kind(static_cast(Kind)), - Role(static_cast(NodeRole::Detached)) {} + Role(static_cast(NodeRole::Detached)), Original(false), + CanModify(false) {} + +bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; } bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; } @@ -56,6 +59,48 @@ this->FirstChild = Child; } +void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, + Node *New) { + assert(!BeforeBegin || BeforeBegin->Parent == this); + +#ifndef NDEBUG + for (auto *N = New; N; N = N->nextSibling()) { + assert(N->Parent == nullptr); + assert(N->role() != NodeRole::Detached && "Roles must be set"); + // FIXME: sanity-check the role. + } +#endif + + // Detach old nodes. + for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->nextSibling(); + N != End;) { + auto *Next = N->NextSibling; + + N->Role = static_cast(NodeRole::Detached); + N->Parent = nullptr; + N->NextSibling = nullptr; + + N = Next; + } + + // Attach new nodes. + if (BeforeBegin) + BeforeBegin->NextSibling = New ? New : End; + else + FirstChild = New ? New : End; + + if (New) { + auto *Last = New; + while (auto *Next = Last->nextSibling()) + Last = Next; + Last->NextSibling = End; + } + + // Mark the node as modified. + for (auto *T = this; T && T->Original; T = T->Parent) + T->Original = false; +} + namespace { static void traverse(const syntax::Node *N, llvm::function_ref Visit) { @@ -85,9 +130,15 @@ static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N, const syntax::Arena &A, std::vector IndentMask) { + std::string Marks; + if (!N->isOriginal()) + Marks += "M"; if (N->role() == syntax::NodeRole::Detached) - OS << "*: "; - // FIXME: find a nice way to print other roles. + Marks += "*"; // FIXME: find a nice way to print other roles. + if (!N->canModify()) + Marks += "I"; + if (!Marks.empty()) + OS << Marks << ": "; if (auto *L = llvm::dyn_cast(N)) { dumpTokens(OS, *L->token(), A.sourceManager()); @@ -133,10 +184,35 @@ if (!L) return; ::dumpTokens(OS, *L->token(), A.sourceManager()); + OS << " "; }); return OS.str(); } +syntax::Leaf *syntax::Tree::firstLeaf() { + auto *T = this; + while (auto *C = T->firstChild()) { + if (auto *L = dyn_cast(C)) + return L; + T = cast(C); + } + return nullptr; +} + +syntax::Leaf *syntax::Tree::lastLeaf() { + auto *T = this; + while (auto *C = T->firstChild()) { + // Find the last child. + while (auto *Next = C->nextSibling()) + C = Next; + + if (auto *L = dyn_cast(C)) + return L; + T = cast(C); + } + return nullptr; +} + syntax::Node *syntax::Tree::findChild(NodeRole R) { for (auto *C = FirstChild; C; C = C->nextSibling()) { if (C->role() == R) diff --git a/clang/unittests/Tooling/Syntax/CMakeLists.txt b/clang/unittests/Tooling/Syntax/CMakeLists.txt --- a/clang/unittests/Tooling/Syntax/CMakeLists.txt +++ b/clang/unittests/Tooling/Syntax/CMakeLists.txt @@ -15,6 +15,7 @@ clangLex clangSerialization clangTooling + clangToolingCore clangToolingSyntax ) diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -9,14 +9,24 @@ #include "clang/Tooling/Syntax/Tree.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Basic/LLVM.h" #include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Mutations.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Tooling.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Support/Annotations.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -24,6 +34,15 @@ using namespace clang; namespace { +static llvm::ArrayRef tokens(syntax::Node *N) { + assert(N->isOriginal() && "tokens of modified node are not well-defined"); + if (auto *L = dyn_cast(N)) + return llvm::makeArrayRef(L->token(), 1); + auto *T = cast(N); + return llvm::makeArrayRef(T->firstLeaf()->token(), + T->lastLeaf()->token() + 1); +} + class SyntaxTreeTest : public ::testing::Test { protected: // Build a syntax tree for the code. @@ -80,13 +99,13 @@ // Prepare to run a compiler. std::vector Args = {"syntax-test", "-std=c++11", "-fsyntax-only", FileName}; - auto CI = createInvocationFromCommandLine(Args, Diags, FS); - assert(CI); - CI->getFrontendOpts().DisableFree = false; - CI->getPreprocessorOpts().addRemappedFile( + Invocation = createInvocationFromCommandLine(Args, Diags, FS); + assert(Invocation); + Invocation->getFrontendOpts().DisableFree = false; + Invocation->getPreprocessorOpts().addRemappedFile( FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); CompilerInstance Compiler; - Compiler.setInvocation(std::move(CI)); + Compiler.setInvocation(Invocation); Compiler.setDiagnostics(Diags.get()); Compiler.setFileManager(FileMgr.get()); Compiler.setSourceManager(SourceMgr.get()); @@ -108,6 +127,27 @@ } } + /// Finds the deepest node in the tree that covers exactly \p R. + /// FIXME: implement this efficiently and move to public syntax tree API. + syntax::Node *nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { + llvm::ArrayRef Toks = tokens(Root); + + if (Toks.front().location().isFileID() && + Toks.back().location().isFileID() && + syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) == + syntax::FileRange(SourceMgr->getMainFileID(), R.Begin, R.End)) + return Root; + + auto *T = dyn_cast(Root); + if (!T) + return nullptr; + for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) { + if (auto *Result = nodeByRange(R, C)) + return Result; + } + return nullptr; + } + // Data fields. llvm::IntrusiveRefCntPtr Diags = new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); @@ -117,6 +157,7 @@ new FileManager(FileSystemOptions(), FS); llvm::IntrusiveRefCntPtr SourceMgr = new SourceManager(*Diags, *FileMgr); + std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr Arena; }; @@ -695,6 +736,39 @@ | `-; `-} )txt"}, + // Some nodes are non-modifiable, they are marked with 'I:'. + {R"cpp( +#define HALF_IF if (1+ +#define HALF_IF_2 1) {} +void test() { + HALF_IF HALF_IF_2 else {} +})cpp", + R"txt( +*: TranslationUnit +`-SimpleDeclaration + |-void + |-test + |-( + |-) + `-CompoundStatement + |-{ + |-IfStatement + | |-I: if + | |-I: ( + | |-I: UnknownExpression + | | |-I: 1 + | | |-I: + + | | `-I: 1 + | |-I: ) + | |-I: CompoundStatement + | | |-I: { + | | `-I: } + | |-else + | `-CompoundStatement + | |-{ + | `-} + `-} + )txt"}, }; for (const auto &T : Cases) { @@ -706,4 +780,45 @@ EXPECT_EQ(Expected, Actual) << "the resulting dump is:\n" << Actual; } } + +TEST_F(SyntaxTreeTest, Mutations) { + using Transformation = std::function; + auto CheckTransformation = [this](std::string Input, std::string Expected, + Transformation Transform) -> void { + llvm::Annotations Source(Input); + auto *Root = buildTree(Source.code()); + + Transform(Source, Root); + + auto Replacements = syntax::computeReplacements(*Arena, *Root); + auto Output = tooling::applyAllReplacements(Source.code(), Replacements); + if (!Output) { + ADD_FAILURE() << "could not apply replacements: " + << llvm::toString(Output.takeError()); + return; + } + + EXPECT_EQ(Expected, *Output) << "input is:\n" << Input; + }; + + // Removes the selected statement. Input should have exactly one selected + // range and it should correspond to a single statement. + auto RemoveStatement = [this](const llvm::Annotations &Input, + syntax::TranslationUnit *TU) { + auto *S = cast(nodeByRange(Input.range(), TU)); + ASSERT_TRUE(S->canModify()) << "cannot remove a statement"; + syntax::removeStatement(*Arena, S); + }; + + std::vector> + Cases = { + {"void test() { [[100+100;]] test(); }", "void test() { test(); }"}, + {"void test() { if (true) [[{}]] else {} }", + "void test() { if (true) ; else {} }"}, + {"void test() { [[;]] }", "void test() { }"}}; + for (const auto &C : Cases) + CheckTransformation(C.first, C.second, RemoveStatement); +} + } // namespace