diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ b/clang/include/clang/Tooling/Syntax/BuildTree.h @@ -11,7 +11,9 @@ #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H #include "clang/AST/Decl.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tree.h" namespace clang { namespace syntax { @@ -19,6 +21,13 @@ /// Build a syntax tree for the main file. syntax::TranslationUnit *buildSyntaxTree(Arena &A, const clang::TranslationUnitDecl &TU); +/// Allows to create syntax trees from subtrees not backed by the source code. +class Factory { +public: + static syntax::Leaf *createPunctuation(syntax::Arena &A, tok::TokenKind K); + static syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A); +}; + } // namespace syntax } // namespace clang #endif diff --git a/clang/include/clang/Tooling/Syntax/Mutations.h b/clang/include/clang/Tooling/Syntax/Mutations.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/Mutations.h @@ -0,0 +1,33 @@ +//===- Mutations.h - mutatate syntax trees --------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// Defines high-level APIs for transforming syntax trees and producing the +// corresponding textual replacements. +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_MUTATIONS_H +#define LLVM_CLANG_TOOLING_SYNTAX_TREE_MUTATIONS_H + +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tree.h" + +namespace clang { +namespace syntax { + +/// Computes textual replacements required to mimic the tree modifications made +/// to the syntax tree. +tooling::Replacements computeReplacements(const Arena &A, + const syntax::TranslationUnit *TU); + +/// Either removes a statement or replaces it with an empty statement. +/// EXPECTS: S->canModify() == true +void removeStatement(syntax::Arena &A, syntax::Statement *S); + +} // namespace syntax +} // namespace clang + +#endif \ No newline at end of file diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -78,6 +78,10 @@ /// Gets the substring that this FileRange refers to. llvm::StringRef text(const SourceManager &SM) const; + /// Convert to the clang range. The returned range is always a char range, + /// never a token range. + CharSourceRange toCharRange(const SourceManager &SM) const; + friend bool operator==(const FileRange &L, const FileRange &R) { return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End); } diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -65,6 +65,9 @@ class Tree; class TreeBuilder; +class Factory; +class MutationsImpl; + enum class NodeKind : uint16_t; enum class NodeRole : uint8_t; @@ -79,6 +82,20 @@ NodeKind kind() const { return static_cast(Kind); } NodeRole role() const { return static_cast(Role); } + /// Whether the node is detached from a tree, i.e. does not have a parent. + bool isDetached() const; + /// Whether the node was created from the AST backed by the source code + /// rather than added later through mutation APIs or created with factory + /// functions. + bool isOriginal() const { return Original; } + /// If this function return false, the tree cannot be modified because there + /// is no reasonable way to produce the corresponding textual replacements. + /// This can happen when the node crosses macro expansion boundaries. + /// + /// Note that even if the node is not modifiable, its child nodes can be + /// modifiable. + bool canModify() const { return CanModify; } + const Tree *parent() const { return Parent; } Tree *parent() { return Parent; } @@ -93,11 +110,17 @@ private: // Tree is allowed to change the Parent link and Role. friend class Tree; + // TreeBuilder is allowed to set the Original and CanModify flags. + friend class TreeBuilder; + // MutationsImpl sets roles and CanModify flag. + friend class MutationsImpl; Tree *Parent; Node *NextSibling; unsigned Kind : 16; unsigned Role : 8; + unsigned Original : 1; + unsigned CanModify : 1; }; /// A leaf node points to a single token inside the expanded token stream. @@ -128,10 +151,17 @@ private: /// Prepend \p Child to the list of children and and sets the parent pointer. /// A very low-level operation that does not check any invariants, only used - /// by TreeBuilder. + /// by TreeBuilder and syntax::Factory. /// EXPECTS: Role != NodeRoleDetached. void prependChildLowLevel(Node *Child, NodeRole Role); friend class TreeBuilder; + friend class Factory; + + /// Replace a range of children [Before->NextSibling, End) with a list of + /// new nodes starting at \p New. + /// Only used by MutationsImpl to implement higher-level mutation operations. + void replaceChildRangeLowLevel(Node *Before, Node *End, Node *New); + friend class MutationsImpl; Node *FirstChild = nullptr; }; diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" #include @@ -70,7 +71,7 @@ syntax::TranslationUnit *finalize() && { auto Tokens = Arena.tokenBuffer().expandedTokens(); // Build the root of the tree, consuming all the children. - Pending.foldChildren(Tokens, + Pending.foldChildren(Arena, Tokens, new (Arena.allocator()) syntax::TranslationUnit); return cast(std::move(Pending).finalize()); @@ -125,9 +126,12 @@ // FIXME: do not add 'eof' to the tree. // Create all leaf nodes. - for (auto &T : A.tokenBuffer().expandedTokens()) - Trees.insert(Trees.end(), - {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}}); + for (auto &T : A.tokenBuffer().expandedTokens()) { + auto *L = new (A.allocator()) syntax::Leaf(&T); + L->Original = true; + L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue(); + Trees.insert(Trees.end(), {&T, NodeAndRole{L}}); + } } void assignRole(llvm::ArrayRef Range, @@ -144,7 +148,8 @@ /// Add \p Node to the forest and fill its children nodes based on the \p /// NodeRange. - void foldChildren(llvm::ArrayRef NodeTokens, + void foldChildren(const syntax::Arena &A, + llvm::ArrayRef NodeTokens, syntax::Tree *Node) { assert(!NodeTokens.empty()); assert(Node->firstChild() == nullptr && "node already has children"); @@ -164,6 +169,11 @@ Node->prependChildLowLevel(std::prev(It)->second.Node, std::prev(It)->second.Role); + // Mark that this node came from the AST and is backed by the source code. + Node->Original = true; + Node->CanModify = + A.tokenBuffer().spelledForExpanded(NodeTokens).hasValue(); + Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, NodeAndRole(Node)}); } @@ -422,7 +432,7 @@ void syntax::TreeBuilder::foldNode(llvm::ArrayRef Range, syntax::Tree *New) { - Pending.foldChildren(Range, New); + Pending.foldChildren(Arena, Range, New); } void syntax::TreeBuilder::markChildToken(SourceLocation Loc, @@ -442,7 +452,8 @@ if (auto *E = dyn_cast(Child)) { Pending.assignRole(getRange(E), NodeRole::ExpressionStatement_expression); // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon. - Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement); + Pending.foldChildren(Arena, Range, + new (allocator()) syntax::ExpressionStatement); } Pending.assignRole(Range, Role); } @@ -468,3 +479,20 @@ BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext()); return std::move(Builder).finalize(); } + +syntax::Leaf *syntax::Factory::createPunctuation(syntax::Arena &A, + tok::TokenKind K) { + auto Tokens = A.lexBuffer(llvm::MemoryBuffer::getMemBuffer( + clang::tok::getPunctuatorSpelling(K))) + .second; + assert(Tokens.size() == 1); + assert(Tokens.front().kind() == K); + return new (A.allocator()) syntax::Leaf(Tokens.begin()); +} + +syntax::EmptyStatement * +syntax::Factory::createEmptyStatement(syntax::Arena &A) { + auto *S = new (A.allocator()) syntax::EmptyStatement; + S->prependChildLowLevel(createPunctuation(A, tok::semi), NodeRole::Unknown); + return S; +} \ No newline at end of file diff --git a/clang/lib/Tooling/Syntax/CMakeLists.txt b/clang/lib/Tooling/Syntax/CMakeLists.txt --- a/clang/lib/Tooling/Syntax/CMakeLists.txt +++ b/clang/lib/Tooling/Syntax/CMakeLists.txt @@ -2,7 +2,9 @@ add_clang_library(clangToolingSyntax BuildTree.cpp + ComputeReplacements.cpp Nodes.cpp + Mutations.cpp Tokens.cpp Tree.cpp diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -0,0 +1,124 @@ +//===- ComputeReplacements.cpp --------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "llvm/Support/Error.h" + +using namespace clang; + +namespace { +using ProcessTokensFn = llvm::function_ref, + bool /*IsOriginal*/)>; +/// Enumerates spans of tokens from the tree consecutively laid out in memory. +struct EnumerateTokenSpans { + EnumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) + : BeginSpan(nullptr), EndSpan(nullptr), SpanIsOriginal(false), + Callback(Callback) { + process(Root); + // Report the last span to the user. + if (BeginSpan) + Callback(llvm::makeArrayRef(BeginSpan, EndSpan), SpanIsOriginal); + } + +private: + void process(const syntax::Node *N) { + if (auto *T = dyn_cast(N)) { + for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) + process(C); + return; + } + + auto *L = cast(N); + if (EndSpan == L->token() && SpanIsOriginal == L->isOriginal()) { + // Extend the current span. + ++EndSpan; + return; + } + // Report the current span to the user. + if (BeginSpan) + Callback(llvm::makeArrayRef(BeginSpan, EndSpan), SpanIsOriginal); + // Start recording a new span. + BeginSpan = L->token(); + EndSpan = BeginSpan + 1; + SpanIsOriginal = L->isOriginal(); + } + + const syntax::Token *BeginSpan; + const syntax::Token *EndSpan; + bool SpanIsOriginal; + ProcessTokensFn Callback; +}; + +syntax::FileRange rangeOfExpanded(const syntax::Arena &A, + llvm::ArrayRef Expanded) { + auto &Buffer = A.tokenBuffer(); + auto &SM = A.sourceManager(); + + assert(Buffer.expandedTokens().begin() <= Expanded.begin()); + assert(Expanded.end() < Buffer.expandedTokens().end()); + + if (Expanded.empty()) + // (!) empty tokens must always point before end(). + return syntax::FileRange( + SM, SM.getExpansionLoc(Expanded.begin()->location()), /*Length=*/0); + + auto Spelled = Buffer.spelledForExpanded(Expanded); + assert(Spelled && "could not find spelled tokens for expanded"); + return syntax::Token::range(SM, Spelled->front(), Spelled->back()); +} +} // namespace + +tooling::Replacements +syntax::computeReplacements(const syntax::Arena &A, + const syntax::TranslationUnit *TU) { + auto &Buffer = A.tokenBuffer(); + auto &SM = A.sourceManager(); + + tooling::Replacements Replacements; + // Text inserted by the replacement we are building now. + std::string Replacement; + auto emitReplacement = [&](llvm::ArrayRef RemovedRange) { + if (RemovedRange.empty() && Replacement.empty()) + return; + + // Removed range points into expanded tokens. + assert(Buffer.expandedTokens().begin() <= RemovedRange.begin()); + assert(RemovedRange.begin() < Buffer.expandedTokens().end()); + + llvm::cantFail(Replacements.add(tooling::Replacement( + SM, rangeOfExpanded(A, RemovedRange).toCharRange(SM), Replacement))); + Replacement = ""; + }; + + const syntax::Token *NextOriginal = Buffer.expandedTokens().begin(); + EnumerateTokenSpans( + TU, [&](llvm::ArrayRef Tokens, bool IsOriginal) { + if (!IsOriginal) { + Replacement += + syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM); + return; + } + assert(NextOriginal <= Tokens.begin()); + // We are looking at a span of original tokens. + if (NextOriginal != Tokens.begin()) { + // There is a gap, record a replacement or deletion. + emitReplacement(llvm::makeArrayRef(NextOriginal, Tokens.begin())); + } else { + // No gap, but we may have pending insertions. Emit them now. + emitReplacement(llvm::makeArrayRef(NextOriginal, /*Length=*/0)); + } + NextOriginal = Tokens.end(); + }); + + // We might have pending replacements at the end of file. If so, emit them. + emitReplacement( + llvm::makeArrayRef(NextOriginal, Buffer.expandedTokens().end())); + + return Replacements; +} \ No newline at end of file diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -0,0 +1,84 @@ +//===- Mutations.cpp ------------------------------------------*- C++ -*-=====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Basic/LLVM.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Token.h" +#include "clang/Tooling/Core/Replacement.h" +#include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Casting.h" +#include +#include + +using namespace clang; + +// This class has access to the internals of tree nodes. Its sole purpose is to +// define helpers that allow implementing the high-level mutation operations. +class syntax::MutationsImpl { +public: + /// Add a new node with a specified role. + static void addAfter(syntax::Node *Anchor, syntax::Node *New, NodeRole Role) { + assert(New->Parent == nullptr); + assert(New->NextSibling == nullptr); + assert(!New->isDetached()); + assert(Role != NodeRole::Detached); + + New->Role = static_cast(Role); + Anchor->parent()->replaceChildRangeLowLevel(Anchor, Anchor, New); + } + + /// Replace the node, keeping the role. + static void replace(syntax::Node *Old, syntax::Node *New) { + assert(New->Parent == nullptr); + assert(New->NextSibling == nullptr); + assert(New->isDetached()); + + New->Role = Old->Role; + Old->parent()->replaceChildRangeLowLevel(findPrevious(Old), + Old->nextSibling(), New); + } + + /// Completely remove the node from its parent. + static void remove(syntax::Node *N) { + N->parent()->replaceChildRangeLowLevel(findPrevious(N), N->nextSibling(), + /*New=*/nullptr); + } + +private: + static syntax::Node *findPrevious(syntax::Node *N) { + if (N->parent()->firstChild() == N) + return nullptr; + for (syntax::Node *C = N->parent()->firstChild(); C != nullptr; + C = C->nextSibling()) { + if (C->nextSibling() == N) + return C; + } + llvm_unreachable("could not find a child node"); + } +}; + +void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) { + assert(S->canModify()); + + if (auto *Parent = llvm::dyn_cast(S->parent())) { + // A child of CompoundStatement can just be safely removed. + MutationsImpl::remove(S); + return; + } + // For the rest, we have to replace with an empty statement. + if (isa(S)) + return; // already an empty statement, nothing to do. + + MutationsImpl::replace(S, syntax::Factory::createEmptyStatement(A)); +} diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -67,7 +67,7 @@ auto F = First.range(SM); auto L = Last.range(SM); assert(F.file() == L.file() && "tokens from different files"); - assert(F.endOffset() <= L.beginOffset() && "wrong order of tokens"); + assert(F == L || F.endOffset() <= L.beginOffset() && "wrong order of tokens"); return FileRange(F.file(), F.beginOffset(), L.endOffset()); } @@ -119,6 +119,12 @@ return Text.substr(Begin, length()); } +CharSourceRange FileRange::toCharRange(const SourceManager &SM) const { + return CharSourceRange( + SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)), + /*IsTokenRange=*/false); +} + std::pair TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); @@ -172,6 +178,10 @@ if (Expanded.empty()) return llvm::None; + // We do not store 'eof' in the spelled tokens. + if (Expanded.back().kind() == tok::eof) + return llvm::None; + // FIXME: also allow changes uniquely mapping to macro arguments. const syntax::Token *BeginSpelled; diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -40,7 +40,10 @@ syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), Kind(static_cast(Kind)), - Role(static_cast(NodeRole::Detached)) {} + Role(static_cast(NodeRole::Detached)), Original(false), + CanModify(false) {} + +bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; } bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; } @@ -56,6 +59,47 @@ this->FirstChild = Child; } +void syntax::Tree::replaceChildRangeLowLevel(Node *Before, Node *End, + Node *New) { + assert(!Before || Before->Parent == this); + +#ifndef NDEBUG + for (auto *N = New; N; N = N->nextSibling()) { + assert(N->Parent == nullptr); + assert(N->role() != NodeRole::Detached && "Roles must be set"); + // FIXME: sanity-check the role. + } +#endif + + // Detach old nodes. + for (auto *N = !Before ? FirstChild : Before->nextSibling(); N != End;) { + auto *Next = N->NextSibling; + + N->Role = static_cast(NodeRole::Detached); + N->Parent = nullptr; + N->NextSibling = nullptr; + + N = Next; + } + + // Attach new nodes. + if (Before) + Before->NextSibling = New ? New : End; + else + FirstChild = New ? New : End; + + if (New) { + auto *Last = New; + while (auto *Next = Last->nextSibling()) + Last = Next; + Last->NextSibling = End; + } + + // Mark the node as modified. + for (auto *T = this; T && T->Original; T = T->Parent) + Original = false; +} + namespace { static void traverse(const syntax::Node *N, llvm::function_ref Visit) { @@ -85,9 +129,15 @@ static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N, const syntax::Arena &A, std::vector IndentMask) { + std::string Marks; + if (!N->isOriginal()) + Marks += "M"; if (N->role() == syntax::NodeRole::Detached) - OS << "*: "; - // FIXME: find a nice way to print other roles. + Marks += "*"; // FIXME: find a nice way to print other roles. + if (!N->canModify()) + Marks += "X"; + if (!Marks.empty()) + OS << Marks << ": "; if (auto *L = llvm::dyn_cast(N)) { dumpTokens(OS, *L->token(), A.sourceManager()); @@ -133,6 +183,7 @@ if (!L) return; ::dumpTokens(OS, *L->token(), A.sourceManager()); + OS << " "; }); return OS.str(); } diff --git a/clang/unittests/Tooling/Syntax/CMakeLists.txt b/clang/unittests/Tooling/Syntax/CMakeLists.txt --- a/clang/unittests/Tooling/Syntax/CMakeLists.txt +++ b/clang/unittests/Tooling/Syntax/CMakeLists.txt @@ -15,6 +15,7 @@ clangLex clangSerialization clangTooling + clangToolingCore clangToolingSyntax ) diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -9,14 +9,24 @@ #include "clang/Tooling/Syntax/Tree.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/Decl.h" +#include "clang/AST/Stmt.h" +#include "clang/Basic/LLVM.h" #include "clang/Frontend/CompilerInstance.h" +#include "clang/Frontend/CompilerInvocation.h" #include "clang/Frontend/FrontendAction.h" #include "clang/Lex/PreprocessorOptions.h" +#include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/Mutations.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Tooling.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Support/Annotations.h" #include "gmock/gmock.h" #include "gtest/gtest.h" #include @@ -24,6 +34,38 @@ using namespace clang; namespace { +static syntax::Leaf *firstLeaf(syntax::Tree *T) { + while (auto *C = T->firstChild()) { + if (auto *L = dyn_cast(C)) + return L; + T = cast(C); + } + return nullptr; +} + +static syntax::Leaf *lastLeaf(syntax::Tree *T) { + while (auto *C = T->firstChild()) { + // Find the last child. + while (auto *Next = C->nextSibling()) + C = Next; + + if (auto *L = dyn_cast(C)) + return L; + T = cast(C); + } + return nullptr; +} + +static llvm::ArrayRef tokens(syntax::Node *N) { + if (auto *L = dyn_cast(N)) + return llvm::makeArrayRef(L->token(), 1); + + auto *T = dyn_cast(N); + syntax::Leaf *First = firstLeaf(T); + syntax::Leaf *Last = lastLeaf(T); + return llvm::makeArrayRef(First->token(), Last->token() + 1); +} + class SyntaxTreeTest : public ::testing::Test { protected: // Build a syntax tree for the code. @@ -80,13 +122,13 @@ // Prepare to run a compiler. std::vector Args = {"syntax-test", "-std=c++11", "-fsyntax-only", FileName}; - auto CI = createInvocationFromCommandLine(Args, Diags, FS); - assert(CI); - CI->getFrontendOpts().DisableFree = false; - CI->getPreprocessorOpts().addRemappedFile( + Invocation = createInvocationFromCommandLine(Args, Diags, FS); + assert(Invocation); + Invocation->getFrontendOpts().DisableFree = false; + Invocation->getPreprocessorOpts().addRemappedFile( FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release()); CompilerInstance Compiler; - Compiler.setInvocation(std::move(CI)); + Compiler.setInvocation(Invocation); Compiler.setDiagnostics(Diags.get()); Compiler.setFileManager(FileMgr.get()); Compiler.setSourceManager(SourceMgr.get()); @@ -108,6 +150,27 @@ } } + /// Finds the deepest nodes in the tree that covers exactly \p R. + /// FIXME: implement this efficiently and move to public syntax tree API. + syntax::Node *nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { + llvm::ArrayRef Toks = tokens(Root); + + if (Toks.front().location().isFileID() && + Toks.back().location().isFileID() && + syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) == + syntax::FileRange(SourceMgr->getMainFileID(), R.Begin, R.End)) + return Root; + + auto *T = dyn_cast(Root); + if (!T) + return nullptr; + for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) { + if (auto *Result = nodeByRange(R, C)) + return Result; + } + return nullptr; + } + // Data fields. llvm::IntrusiveRefCntPtr Diags = new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions); @@ -117,6 +180,7 @@ new FileManager(FileSystemOptions(), FS); llvm::IntrusiveRefCntPtr SourceMgr = new SourceManager(*Diags, *FileMgr); + std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr Arena; }; @@ -455,6 +519,40 @@ | | `-; | `-} `- + )txt"}, + // Some nodes are non-modifiable, they are marked with 'X:'. + {R"cpp( +#define HALF_IF if (1+ +#define HALF_IF_2 1) {} +void test() { + HALF_IF HALF_IF_2 else {} +})cpp", + R"txt( +*X: TranslationUnit +|-TopLevelDeclaration +| |-void +| |-test +| |-( +| |-) +| `-CompoundStatement +| |-{ +| |-IfStatement +| | |-X: if +| | |-X: ( +| | |-X: UnknownExpression +| | | |-X: 1 +| | | |-X: + +| | | `-X: 1 +| | |-X: ) +| | |-X: CompoundStatement +| | | |-X: { +| | | `-X: } +| | |-else +| | `-CompoundStatement +| | |-{ +| | `-} +| `-} +`-X: )txt"}}; for (const auto &T : Cases) { @@ -464,4 +562,45 @@ EXPECT_EQ(Expected, Actual) << "the resulting dump is:\n" << Actual; } } + +TEST_F(SyntaxTreeTest, Mutations) { + using Transformation = std::function; + auto CheckTransformation = [this](std::string Input, std::string Expected, + Transformation Transform) -> void { + llvm::Annotations Source(Input); + auto *Root = buildTree(Source.code()); + + Transform(Source, Root); + + auto Replacements = syntax::computeReplacements(*Arena, Root); + auto Output = tooling::applyAllReplacements(Source.code(), Replacements); + if (!Output) { + ADD_FAILURE() << "could not apply replacements: {0}" + << llvm::toString(Output.takeError()); + return; + } + + EXPECT_EQ(Expected, *Output) << "input is:\n" << Input; + }; + + // Removes a selected statements. Input should have exactly one selected range + // and it should correspond to a statement. + auto RemoveStatement = [this](const llvm::Annotations &Input, + syntax::TranslationUnit *TU) { + auto *S = cast(nodeByRange(Input.range(), TU)); + ASSERT_TRUE(S->canModify()) << "cannot remove a statement"; + syntax::removeStatement(*Arena, S); + }; + + std::vector> + Cases = { + {"void test() { [[100+100;]] test(); }", "void test() { test(); }"}, + {"void test() { if (true) [[{}]] else {} }", + "void test() { if (true) ; else {} }"}, + {"void test() { [[;]] }", "void test() { }"}}; + for (const auto &C : Cases) + CheckTransformation(C.first, C.second, RemoveStatement); +} + } // namespace