diff --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp --- a/clang-tools-extra/clangd/SemanticSelection.cpp +++ b/clang-tools-extra/clangd/SemanticSelection.cpp @@ -16,6 +16,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Tooling/Syntax/BuildTree.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Casting.h" @@ -52,8 +53,9 @@ return Range; } -llvm::Optional extractFoldingRange(const syntax::Node *Node, - const SourceManager &SM) { +llvm::Optional +extractFoldingRange(const syntax::Node *Node, + const syntax::TokenBufferTokenManager &TM) { if (const auto *Stmt = dyn_cast(Node)) { const auto *LBrace = cast_or_null( Stmt->findChild(syntax::NodeRole::OpenParen)); @@ -65,9 +67,12 @@ if (!LBrace || !RBrace) return llvm::None; // Fold the entire range within braces, including whitespace. - const SourceLocation LBraceLocInfo = LBrace->getToken()->endLocation(), - RBraceLocInfo = RBrace->getToken()->location(); - auto Range = toFoldingRange(SourceRange(LBraceLocInfo, RBraceLocInfo), SM); + const SourceLocation LBraceLocInfo = + TM.getToken(LBrace->getTokenKey())->endLocation(), + RBraceLocInfo = + TM.getToken(RBrace->getTokenKey())->location(); + auto Range = toFoldingRange(SourceRange(LBraceLocInfo, RBraceLocInfo), + TM.sourceManager()); // Do not generate folding range for compound statements without any // nodes and newlines. if (Range && Range->startLine != Range->endLine) @@ -77,15 +82,16 @@ } // Traverse the tree and collect folding ranges along the way. -std::vector collectFoldingRanges(const syntax::Node *Root, - const SourceManager &SM) { +std::vector +collectFoldingRanges(const syntax::Node *Root, + const syntax::TokenBufferTokenManager &TM) { std::queue Nodes; Nodes.push(Root); std::vector Result; while (!Nodes.empty()) { const syntax::Node *Node = Nodes.front(); Nodes.pop(); - const auto Range = extractFoldingRange(Node, SM); + const auto Range = extractFoldingRange(Node, TM); if (Range) Result.push_back(*Range); if (const auto *T = dyn_cast(Node)) @@ -157,9 +163,11 @@ // control flow statement bodies). // Related issue: https://github.com/clangd/clangd/issues/310 llvm::Expected> getFoldingRanges(ParsedAST &AST) { - syntax::Arena A(AST.getSourceManager(), AST.getLangOpts(), AST.getTokens()); - const auto *SyntaxTree = syntax::buildSyntaxTree(A, AST.getASTContext()); - return collectFoldingRanges(SyntaxTree, AST.getSourceManager()); + syntax::Arena A; + syntax::TokenBufferTokenManager TM(AST.getTokens(), AST.getLangOpts(), + AST.getSourceManager()); + const auto *SyntaxTree = syntax::buildSyntaxTree(A, TM, AST.getASTContext()); + return collectFoldingRanges(SyntaxTree, TM); } } // namespace clangd diff --git a/clang/include/clang/Tooling/Syntax/BuildTree.h b/clang/include/clang/Tooling/Syntax/BuildTree.h --- a/clang/include/clang/Tooling/Syntax/BuildTree.h +++ b/clang/include/clang/Tooling/Syntax/BuildTree.h @@ -13,6 +13,7 @@ #include "clang/AST/Decl.h" #include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tree.h" namespace clang { @@ -21,19 +22,21 @@ /// Build a syntax tree for the main file. /// This usually covers the whole TranslationUnitDecl, but can be restricted by /// the ASTContext's traversal scope. -syntax::TranslationUnit *buildSyntaxTree(Arena &A, ASTContext &Context); +syntax::TranslationUnit * +buildSyntaxTree(Arena &A, TokenBufferTokenManager &TBTM, ASTContext &Context); // Create syntax trees from subtrees not backed by the source code. // Synthesis of Leafs /// Create `Leaf` from token with `Spelling` and assert it has the desired /// `TokenKind`. -syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K, - StringRef Spelling); +syntax::Leaf *createLeaf(syntax::Arena &A, TokenBufferTokenManager &TBTM, + tok::TokenKind K, StringRef Spelling); /// Infer the token spelling from its `TokenKind`, then create `Leaf` from /// this token -syntax::Leaf *createLeaf(syntax::Arena &A, tok::TokenKind K); +syntax::Leaf *createLeaf(syntax::Arena &A, TokenBufferTokenManager &TBTM, + tok::TokenKind K); // Synthesis of Trees /// Creates the concrete syntax node according to the specified `NodeKind` `K`. @@ -44,7 +47,8 @@ syntax::NodeKind K); // Synthesis of Syntax Nodes -syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A); +syntax::EmptyStatement *createEmptyStatement(syntax::Arena &A, + TokenBufferTokenManager &TBTM); /// Creates a completely independent copy of `N` with its macros expanded. /// @@ -52,7 +56,9 @@ /// * Detached, i.e. `Parent == NextSibling == nullptr` and /// `Role == Detached`. /// * Synthesized, i.e. `Original == false`. -syntax::Node *deepCopyExpandingMacros(syntax::Arena &A, const syntax::Node *N); +syntax::Node *deepCopyExpandingMacros(syntax::Arena &A, + TokenBufferTokenManager &TBTM, + const syntax::Node *N); } // namespace syntax } // namespace clang #endif diff --git a/clang/include/clang/Tooling/Syntax/Mutations.h b/clang/include/clang/Tooling/Syntax/Mutations.h --- a/clang/include/clang/Tooling/Syntax/Mutations.h +++ b/clang/include/clang/Tooling/Syntax/Mutations.h @@ -13,6 +13,7 @@ #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tree.h" namespace clang { @@ -20,7 +21,7 @@ /// Computes textual replacements required to mimic the tree modifications made /// to the syntax tree. -tooling::Replacements computeReplacements(const Arena &A, +tooling::Replacements computeReplacements(const TokenBufferTokenManager &TBTM, const syntax::TranslationUnit &TU); /// Removes a statement or replaces it with an empty statement where one is @@ -29,7 +30,8 @@ /// One can remove `foo();` completely and to remove `bar();` we would need to /// replace it with an empty statement. /// EXPECTS: S->canModify() == true -void removeStatement(syntax::Arena &A, syntax::Statement *S); +void removeStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM, + syntax::Statement *S); } // namespace syntax } // namespace clang diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -21,9 +21,7 @@ #ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H #define LLVM_CLANG_TOOLING_SYNTAX_NODES_H -#include "clang/Basic/TokenKinds.h" -#include "clang/Lex/Token.h" -#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Basic/LLVM.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" diff --git a/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h b/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h @@ -0,0 +1,70 @@ +//===- TokenBufferTokenManager.h -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H +#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H + +#include "clang/Tooling/Syntax/TokenManager.h" +#include "clang/Tooling/Syntax/Tokens.h" + +namespace clang { +namespace syntax { + +/// A TokenBuffer-powered token manager. +/// It tracks the underlying token buffers, source manager, etc. +class TokenBufferTokenManager : public TokenManager { +public: + TokenBufferTokenManager(const TokenBuffer &Tokens, + const LangOptions &LangOpts, SourceManager &SourceMgr) + : Tokens(Tokens), LangOpts(LangOpts), SM(SourceMgr) {} + + static bool classof(const TokenManager *N) { return N->kind() == Kind; } + llvm::StringLiteral kind() const override { return Kind; } + + llvm::StringRef getText(Key I) const override { + const auto *Token = getToken(I); + assert(Token); + // Handle 'eof' separately, calling text() on it produces an empty string. + // FIXME: this special logic is for syntax::Leaf dump, move it when we + // have a direct way to retrive token kind in the syntax::Leaf. + if (Token->kind() == tok::eof) + return ""; + return Token->text(SM); + } + + const syntax::Token *getToken(Key I) const { + return reinterpret_cast(I); + } + SourceManager &sourceManager() { return SM; } + const SourceManager &sourceManager() const { return SM; } + const TokenBuffer &tokenBuffer() const { return Tokens; } + +private: + // This manager is powered by the TokenBuffer. + static constexpr llvm::StringLiteral Kind = "TokenBuffer"; + + /// Add \p Buffer to the underlying source manager, tokenize it and store the + /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens + /// that were not written in user code. + std::pair> + lexBuffer(std::unique_ptr Buffer); + friend class FactoryImpl; + + const TokenBuffer &Tokens; + const LangOptions &LangOpts; + + /// The underlying source manager for the ExtraTokens. + SourceManager &SM; + /// IDs and storage for additional tokenized files. + llvm::DenseMap> ExtraTokens; +}; + +} // namespace syntax +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H diff --git a/clang/include/clang/Tooling/Syntax/TokenManager.h b/clang/include/clang/Tooling/Syntax/TokenManager.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/TokenManager.h @@ -0,0 +1,45 @@ +//===- TokenManager.h - Manage Tokens for syntax-tree ------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines Token interfaces for the clang syntax-tree. This is the level of +// abstraction that the syntax-tree uses to operate on Token. +// +// TokenManager decouples the syntax-tree from a particular token +// implementation. For example, a TokenBuffer captured from a clang parser may +// track macro expansions and associate tokens with clang's SourceManager, while +// a clang pseudoparser would use a flat array of raw-lexed tokens in memory. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H +#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H + +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +namespace syntax { + +/// Defines interfaces for operating "Token" in the clang syntax-tree. +class TokenManager { +public: + /// Describes what the exact class kind of the TokenManager is. + virtual llvm::StringLiteral kind() const = 0; + + /// A key to identify a specific token. The token concept depends on the + /// underlying implementation -- it can be a spelled token from the original + /// source file or an expanded token. + /// The syntax-tree Leaf node holds a Key. + using Key = uintptr_t; + virtual llvm::StringRef getText(Key K) const = 0; +}; + +} // namespace syntax +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -33,6 +33,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" +#include "clang/Tooling/Syntax/TokenManager.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // Defines the basic structure of the syntax tree. There are two kinds of nodes: -// - leaf nodes correspond to a token in the expanded token stream, +// - leaf nodes correspond to tokens, // - tree nodes correspond to language grammar constructs. // // The tree is initially built from an AST. Each node of a newly built tree @@ -21,11 +21,8 @@ #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" -#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/TokenManager.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/iterator.h" @@ -36,33 +33,12 @@ namespace clang { namespace syntax { -/// A memory arena for syntax trees. Also tracks the underlying token buffers, -/// source manager, etc. +/// A memory arena for syntax trees. +// FIXME: use BumpPtrAllocator directly. class Arena { public: - Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - const TokenBuffer &Tokens); - - const SourceManager &getSourceManager() const { return SourceMgr; } - const LangOptions &getLangOptions() const { return LangOpts; } - - const TokenBuffer &getTokenBuffer() const; llvm::BumpPtrAllocator &getAllocator() { return Allocator; } - -private: - /// Add \p Buffer to the underlying source manager, tokenize it and store the - /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens - /// that were not written in user code. - std::pair> - lexBuffer(std::unique_ptr Buffer); - friend class FactoryImpl; - private: - SourceManager &SourceMgr; - const LangOptions &LangOpts; - const TokenBuffer &Tokens; - /// IDs and storage for additional tokenized files. - llvm::DenseMap> ExtraTokens; /// Keeps all the allocated nodes and their intermediate data structures. llvm::BumpPtrAllocator Allocator; }; @@ -122,9 +98,9 @@ Node *getPreviousSibling() { return PreviousSibling; } /// Dumps the structure of a subtree. For debugging and testing purposes. - std::string dump(const SourceManager &SM) const; + std::string dump(const TokenManager &SM) const; /// Dumps the tokens forming this subtree. - std::string dumpTokens(const SourceManager &SM) const; + std::string dumpTokens(const TokenManager &SM) const; /// Asserts invariants on this node of the tree and its immediate children. /// Will not recurse into the subtree. No-op if NDEBUG is set. @@ -153,16 +129,17 @@ unsigned CanModify : 1; }; -/// A leaf node points to a single token inside the expanded token stream. +/// A leaf node points to a single token. +// FIXME: add TokenKind field (borrow some bits from the Node::kind). class Leaf final : public Node { public: - Leaf(const Token *T); + Leaf(TokenManager::Key K); static bool classof(const Node *N); - const Token *getToken() const { return Tok; } + TokenManager::Key getTokenKey() const { return K; } private: - const Token *Tok; + TokenManager::Key K; }; /// A node that has children and represents a syntactic language construct. diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -27,6 +27,7 @@ #include "clang/Lex/Lexer.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" @@ -365,21 +366,24 @@ /// Call finalize() to finish building the tree and consume the root node. class syntax::TreeBuilder { public: - TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { - for (const auto &T : Arena.getTokenBuffer().expandedTokens()) + TreeBuilder(syntax::Arena &Arena, TokenBufferTokenManager& TBTM) + : Arena(Arena), + TBTM(TBTM), + Pending(Arena, TBTM.tokenBuffer()) { + for (const auto &T : TBTM.tokenBuffer().expandedTokens()) LocationToToken.insert({T.location(), &T}); } llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } const SourceManager &sourceManager() const { - return Arena.getSourceManager(); + return TBTM.sourceManager(); } /// Populate children for \p New node, assuming it covers tokens from \p /// Range. void foldNode(ArrayRef Range, syntax::Tree *New, ASTPtr From) { assert(New); - Pending.foldChildren(Arena, Range, New); + Pending.foldChildren(TBTM.tokenBuffer(), Range, New); if (From) Mapping.add(From, New); } @@ -392,7 +396,7 @@ void foldNode(llvm::ArrayRef Range, syntax::Tree *New, NestedNameSpecifierLoc From) { assert(New); - Pending.foldChildren(Arena, Range, New); + Pending.foldChildren(TBTM.tokenBuffer(), Range, New); if (From) Mapping.add(From, New); } @@ -403,7 +407,7 @@ ASTPtr From) { assert(New); auto ListRange = Pending.shrinkToFitList(SuperRange); - Pending.foldChildren(Arena, ListRange, New); + Pending.foldChildren(TBTM.tokenBuffer(), ListRange, New); if (From) Mapping.add(From, New); } @@ -434,12 +438,12 @@ /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { - auto Tokens = Arena.getTokenBuffer().expandedTokens(); + auto Tokens = TBTM.tokenBuffer().expandedTokens(); assert(!Tokens.empty()); assert(Tokens.back().kind() == tok::eof); // Build the root of the tree, consuming all the children. - Pending.foldChildren(Arena, Tokens.drop_back(), + Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(), new (Arena.getAllocator()) syntax::TranslationUnit); auto *TU = cast(std::move(Pending).finalize()); @@ -464,7 +468,7 @@ assert(First.isValid()); assert(Last.isValid()); assert(First == Last || - Arena.getSourceManager().isBeforeInTranslationUnit(First, Last)); + TBTM.sourceManager().isBeforeInTranslationUnit(First, Last)); return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } @@ -564,15 +568,16 @@ /// /// Ensures that added nodes properly nest and cover the whole token stream. struct Forest { - Forest(syntax::Arena &A) { - assert(!A.getTokenBuffer().expandedTokens().empty()); - assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof); + Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) { + assert(!TB.expandedTokens().empty()); + assert(TB.expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) { - auto *L = new (A.getAllocator()) syntax::Leaf(&T); + for (const auto &T : TB.expandedTokens().drop_back()) { + auto *L = new (A.getAllocator()) + syntax::Leaf(reinterpret_cast(&T)); L->Original = true; - L->CanModify = A.getTokenBuffer().spelledForExpanded(T).has_value(); + L->CanModify = TB.spelledForExpanded(T).has_value(); Trees.insert(Trees.end(), {&T, L}); } } @@ -620,8 +625,8 @@ } /// Add \p Node to the forest and attach child nodes based on \p Tokens. - void foldChildren(const syntax::Arena &A, ArrayRef Tokens, - syntax::Tree *Node) { + void foldChildren(const syntax::TokenBuffer &TB, + ArrayRef Tokens, syntax::Tree *Node) { // Attach children to `Node`. assert(Node->getFirstChild() == nullptr && "node already has children"); @@ -646,7 +651,7 @@ // Mark that this node came from the AST and is backed by the source code. Node->Original = true; Node->CanModify = - A.getTokenBuffer().spelledForExpanded(Tokens).has_value(); + TB.spelledForExpanded(Tokens).has_value(); Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, Node}); @@ -660,18 +665,18 @@ return Root; } - std::string str(const syntax::Arena &A) const { + std::string str(const syntax::TokenBufferTokenManager &STM) const { std::string R; for (auto It = Trees.begin(); It != Trees.end(); ++It) { unsigned CoveredTokens = It != Trees.end() ? (std::next(It)->first - It->first) - : A.getTokenBuffer().expandedTokens().end() - It->first; + : STM.tokenBuffer().expandedTokens().end() - It->first; R += std::string( formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(), - It->first->text(A.getSourceManager()), CoveredTokens)); - R += It->second->dump(A.getSourceManager()); + It->first->text(STM.sourceManager()), CoveredTokens)); + R += It->second->dump(STM); } return R; } @@ -684,9 +689,10 @@ }; /// For debugging purposes. - std::string str() { return Pending.str(Arena); } + std::string str() { return Pending.str(TBTM); } syntax::Arena &Arena; + TokenBufferTokenManager& TBTM; /// To quickly find tokens by their start location. llvm::DenseMap LocationToToken; Forest Pending; @@ -1718,7 +1724,7 @@ markExprChild(ChildExpr, NodeRole::Expression); ChildNode = new (allocator()) syntax::ExpressionStatement; // (!) 'getStmtRange()' ensures this covers a trailing semicolon. - Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); + Pending.foldChildren(TBTM.tokenBuffer(), getStmtRange(Child), ChildNode); } else { ChildNode = Mapping.find(Child); } @@ -1745,8 +1751,9 @@ } syntax::TranslationUnit *syntax::buildSyntaxTree(Arena &A, + TokenBufferTokenManager& TBTM, ASTContext &Context) { - TreeBuilder Builder(A); + TreeBuilder Builder(A, TBTM); BuildTreeVisitor(Context, Builder).TraverseAST(Context); return std::move(Builder).finalize(); } diff --git a/clang/lib/Tooling/Syntax/CMakeLists.txt b/clang/lib/Tooling/Syntax/CMakeLists.txt --- a/clang/lib/Tooling/Syntax/CMakeLists.txt +++ b/clang/lib/Tooling/Syntax/CMakeLists.txt @@ -5,6 +5,7 @@ ComputeReplacements.cpp Nodes.cpp Mutations.cpp + TokenBufferTokenManager.cpp Synthesis.cpp Tokens.cpp Tree.cpp diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp --- a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" #include "llvm/Support/Error.h" using namespace clang; @@ -16,10 +18,13 @@ using ProcessTokensFn = llvm::function_ref, bool /*IsOriginal*/)>; /// Enumerates spans of tokens from the tree consecutively laid out in memory. -void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { +void enumerateTokenSpans(const syntax::Tree *Root, + const syntax::TokenBufferTokenManager &STM, + ProcessTokensFn Callback) { struct Enumerator { - Enumerator(ProcessTokensFn Callback) - : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), + Enumerator(const syntax::TokenBufferTokenManager &STM, + ProcessTokensFn Callback) + : STM(STM), SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), Callback(Callback) {} void run(const syntax::Tree *Root) { @@ -39,7 +44,8 @@ } auto *L = cast(N); - if (SpanEnd == L->getToken() && SpanIsOriginal == L->isOriginal()) { + if (SpanEnd == STM.getToken(L->getTokenKey()) && + SpanIsOriginal == L->isOriginal()) { // Extend the current span. ++SpanEnd; return; @@ -48,24 +54,25 @@ if (SpanBegin) Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); // Start recording a new span. - SpanBegin = L->getToken(); + SpanBegin = STM.getToken(L->getTokenKey()); SpanEnd = SpanBegin + 1; SpanIsOriginal = L->isOriginal(); } + const syntax::TokenBufferTokenManager &STM; const syntax::Token *SpanBegin; const syntax::Token *SpanEnd; bool SpanIsOriginal; ProcessTokensFn Callback; }; - return Enumerator(Callback).run(Root); + return Enumerator(STM, Callback).run(Root); } -syntax::FileRange rangeOfExpanded(const syntax::Arena &A, +syntax::FileRange rangeOfExpanded(const syntax::TokenBufferTokenManager &STM, llvm::ArrayRef Expanded) { - const auto &Buffer = A.getTokenBuffer(); - const auto &SM = A.getSourceManager(); + const auto &Buffer = STM.tokenBuffer(); + const auto &SM = STM.sourceManager(); // Check that \p Expanded actually points into expanded tokens. assert(Buffer.expandedTokens().begin() <= Expanded.begin()); @@ -83,10 +90,10 @@ } // namespace tooling::Replacements -syntax::computeReplacements(const syntax::Arena &A, +syntax::computeReplacements(const TokenBufferTokenManager &TBTM, const syntax::TranslationUnit &TU) { - const auto &Buffer = A.getTokenBuffer(); - const auto &SM = A.getSourceManager(); + const auto &Buffer = TBTM.tokenBuffer(); + const auto &SM = TBTM.sourceManager(); tooling::Replacements Replacements; // Text inserted by the replacement we are building now. @@ -95,13 +102,13 @@ if (ReplacedRange.empty() && Replacement.empty()) return; llvm::cantFail(Replacements.add(tooling::Replacement( - SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement))); + SM, rangeOfExpanded(TBTM, ReplacedRange).toCharRange(SM), + Replacement))); Replacement = ""; }; - const syntax::Token *NextOriginal = Buffer.expandedTokens().begin(); enumerateTokenSpans( - &TU, [&](llvm::ArrayRef Tokens, bool IsOriginal) { + &TU, TBTM, [&](llvm::ArrayRef Tokens, bool IsOriginal) { if (!IsOriginal) { Replacement += syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM); diff --git a/clang/lib/Tooling/Syntax/Mutations.cpp b/clang/lib/Tooling/Syntax/Mutations.cpp --- a/clang/lib/Tooling/Syntax/Mutations.cpp +++ b/clang/lib/Tooling/Syntax/Mutations.cpp @@ -77,7 +77,8 @@ } }; -void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) { +void syntax::removeStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM, + syntax::Statement *S) { assert(S); assert(S->canModify()); @@ -90,5 +91,5 @@ if (isa(S)) return; // already an empty statement, nothing to do. - MutationsImpl::replace(S, createEmptyStatement(A)); + MutationsImpl::replace(S, createEmptyStatement(A, TBTM)); } diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -8,6 +8,8 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/BuildTree.h" #include "clang/Tooling/Syntax/Tree.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" using namespace clang; @@ -27,35 +29,40 @@ } static std::pair> - lexBuffer(syntax::Arena &A, std::unique_ptr Buffer) { - return A.lexBuffer(std::move(Buffer)); + lexBuffer(TokenBufferTokenManager &TBTM, + std::unique_ptr Buffer) { + return TBTM.lexBuffer(std::move(Buffer)); } }; // FIXME: `createLeaf` is based on `syntax::tokenize` internally, as such it // doesn't support digraphs or line continuations. -syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K, - StringRef Spelling) { +syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, + TokenBufferTokenManager &TBTM, + tok::TokenKind K, StringRef Spelling) { auto Tokens = - FactoryImpl::lexBuffer(A, llvm::MemoryBuffer::getMemBufferCopy(Spelling)) + FactoryImpl::lexBuffer(TBTM, llvm::MemoryBuffer::getMemBufferCopy(Spelling)) .second; assert(Tokens.size() == 1); assert(Tokens.front().kind() == K && "spelling is not lexed into the expected kind of token"); - auto *Leaf = new (A.getAllocator()) syntax::Leaf(Tokens.begin()); + auto *Leaf = new (A.getAllocator()) syntax::Leaf( + reinterpret_cast(Tokens.begin())); syntax::FactoryImpl::setCanModify(Leaf); Leaf->assertInvariants(); return Leaf; } -syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K) { +syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, + TokenBufferTokenManager &TBTM, + tok::TokenKind K) { const auto *Spelling = tok::getPunctuatorSpelling(K); if (!Spelling) Spelling = tok::getKeywordSpelling(K); assert(Spelling && "Cannot infer the spelling of the token from its token kind."); - return createLeaf(A, K, Spelling); + return createLeaf(A, TBTM, K, Spelling); } namespace { @@ -208,24 +215,25 @@ } syntax::Node *clang::syntax::deepCopyExpandingMacros(syntax::Arena &A, + TokenBufferTokenManager &TBTM, const syntax::Node *N) { if (const auto *L = dyn_cast(N)) // `L->getToken()` gives us the expanded token, thus we implicitly expand // any macros here. - return createLeaf(A, L->getToken()->kind(), - L->getToken()->text(A.getSourceManager())); + return createLeaf(A, TBTM, TBTM.getToken(L->getTokenKey())->kind(), + TBTM.getText(L->getTokenKey())); const auto *T = cast(N); std::vector> Children; for (const auto *Child = T->getFirstChild(); Child; Child = Child->getNextSibling()) - Children.push_back({deepCopyExpandingMacros(A, Child), Child->getRole()}); + Children.push_back({deepCopyExpandingMacros(A, TBTM, Child), Child->getRole()}); return createTree(A, Children, N->getKind()); } -syntax::EmptyStatement *clang::syntax::createEmptyStatement(syntax::Arena &A) { +syntax::EmptyStatement *clang::syntax::createEmptyStatement(syntax::Arena &A, TokenBufferTokenManager &TBTM) { return cast( - createTree(A, {{createLeaf(A, tok::semi), NodeRole::Unknown}}, + createTree(A, {{createLeaf(A, TBTM, tok::semi), NodeRole::Unknown}}, NodeKind::EmptyStatement)); } diff --git a/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp b/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp @@ -0,0 +1,25 @@ +//===- TokenBufferTokenManager.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" + +namespace clang { +namespace syntax { +constexpr llvm::StringLiteral syntax::TokenBufferTokenManager::Kind; + +std::pair> +syntax::TokenBufferTokenManager::lexBuffer( + std::unique_ptr Input) { + auto FID = SM.createFileID(std::move(Input)); + auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SM, LangOpts)); + assert(It.second && "duplicate FileID"); + return {FID, It.first->second}; +} + +} // namespace syntax +} // namespace clang diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -33,25 +33,7 @@ } } // namespace -syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - const TokenBuffer &Tokens) - : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} - -const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { - return Tokens; -} - -std::pair> -syntax::Arena::lexBuffer(std::unique_ptr Input) { - auto FID = SourceMgr.createFileID(std::move(Input)); - auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); - assert(It.second && "duplicate FileID"); - return {FID, It.first->second}; -} - -syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { - assert(Tok != nullptr); -} +syntax::Leaf::Leaf(syntax::TokenManager::Key K) : Node(NodeKind::Leaf), K(K) {} syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr), @@ -190,20 +172,8 @@ } namespace { -static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, - const SourceManager &SM) { - assert(L); - const auto *Token = L->getToken(); - assert(Token); - // Handle 'eof' separately, calling text() on it produces an empty string. - if (Token->kind() == tok::eof) - OS << ""; - else - OS << Token->text(SM); -} - static void dumpNode(raw_ostream &OS, const syntax::Node *N, - const SourceManager &SM, llvm::BitVector IndentMask) { + const syntax::TokenManager &TM, llvm::BitVector IndentMask) { auto DumpExtraInfo = [&OS](const syntax::Node *N) { if (N->getRole() != syntax::NodeRole::Unknown) OS << " " << N->getRole(); @@ -216,7 +186,7 @@ assert(N); if (const auto *L = dyn_cast(N)) { OS << "'"; - dumpLeaf(OS, L, SM); + OS << TM.getText(L->getTokenKey()); OS << "'"; DumpExtraInfo(N); OS << "\n"; @@ -242,25 +212,25 @@ OS << "|-"; IndentMask.push_back(true); } - dumpNode(OS, &It, SM, IndentMask); + dumpNode(OS, &It, TM, IndentMask); IndentMask.pop_back(); } } } // namespace -std::string syntax::Node::dump(const SourceManager &SM) const { +std::string syntax::Node::dump(const TokenManager &TM) const { std::string Str; llvm::raw_string_ostream OS(Str); - dumpNode(OS, this, SM, /*IndentMask=*/{}); + dumpNode(OS, this, TM, /*IndentMask=*/{}); return std::move(OS.str()); } -std::string syntax::Node::dumpTokens(const SourceManager &SM) const { +std::string syntax::Node::dumpTokens(const TokenManager &TM) const { std::string Storage; llvm::raw_string_ostream OS(Storage); traverse(this, [&](const syntax::Node *N) { if (const auto *L = dyn_cast(N)) { - dumpLeaf(OS, L, SM); + OS << TM.getText(L->getTokenKey()); OS << " "; } }); @@ -297,7 +267,8 @@ C.getRole() == NodeRole::ListDelimiter); if (C.getRole() == NodeRole::ListDelimiter) { assert(isa(C)); - assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); + // FIXME: re-enable it when there is way to retrieve token kind in Leaf. + // assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); } } diff --git a/clang/tools/clang-check/ClangCheck.cpp b/clang/tools/clang-check/ClangCheck.cpp --- a/clang/tools/clang-check/ClangCheck.cpp +++ b/clang/tools/clang-check/ClangCheck.cpp @@ -25,6 +25,7 @@ #include "clang/StaticAnalyzer/Frontend/FrontendActions.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "clang/Tooling/Tooling.h" @@ -157,9 +158,11 @@ clang::syntax::TokenBuffer TB = std::move(Collector).consume(); if (TokensDump) llvm::outs() << TB.dumpForTests(); - clang::syntax::Arena A(AST.getSourceManager(), AST.getLangOpts(), TB); - llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump( - AST.getSourceManager()); + clang::syntax::TokenBufferTokenManager TBTM(TB, AST.getLangOpts(), + AST.getSourceManager()); + clang::syntax::Arena A; + llvm::outs() + << clang::syntax::buildSyntaxTree(A, TBTM, AST)->dump(TBTM); } private: diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -26,7 +26,7 @@ auto ErrorOK = errorOK(Code); if (!ErrorOK) return ErrorOK; - auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); + auto Actual = StringRef(Root->dump(*TM)).trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Tree.trim().str(), Actual); if (Actual != Tree.trim().str()) { @@ -59,7 +59,7 @@ auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root); assert(AnnotatedNode); auto AnnotatedNodeDump = - StringRef(AnnotatedNode->dump(Arena->getSourceManager())) + StringRef(AnnotatedNode->dump(*TM)) .trim() .str(); // EXPECT_EQ shows the diff between the two strings if they are different. diff --git a/clang/unittests/Tooling/Syntax/MutationsTest.cpp b/clang/unittests/Tooling/Syntax/MutationsTest.cpp --- a/clang/unittests/Tooling/Syntax/MutationsTest.cpp +++ b/clang/unittests/Tooling/Syntax/MutationsTest.cpp @@ -30,7 +30,7 @@ Transform(Source, Root); - auto Replacements = syntax::computeReplacements(*Arena, *Root); + auto Replacements = syntax::computeReplacements(*TM, *Root); auto Output = tooling::applyAllReplacements(Source.code(), Replacements); if (!Output) { ADD_FAILURE() << "could not apply replacements: " @@ -47,7 +47,7 @@ TranslationUnit *Root) { auto *S = cast(nodeByRange(Input.range(), Root)); ASSERT_TRUE(S->canModify()) << "cannot remove a statement"; - syntax::removeStatement(*Arena, S); + syntax::removeStatement(*Arena, *TM, S); EXPECT_TRUE(S->isDetached()); EXPECT_FALSE(S->isOriginal()) << "node removed from tree cannot be marked as original"; diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp --- a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -27,7 +27,7 @@ return ::testing::AssertionFailure() << "Root was not built successfully."; - auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); + auto Actual = StringRef(Root->dump(*TM)).trim().str(); auto Expected = Dump.trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Expected, Actual); @@ -44,7 +44,7 @@ TEST_P(SynthesisTest, Leaf_Punctuation) { buildTree("", GetParam()); - auto *Leaf = createLeaf(*Arena, tok::comma); + auto *Leaf = createLeaf(*Arena, *TM, tok::comma); EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( ',' Detached synthesized @@ -57,7 +57,7 @@ buildTree("", GetParam()); - auto *Leaf = createLeaf(*Arena, tok::coloncolon); + auto *Leaf = createLeaf(*Arena, *TM, tok::coloncolon); EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( '::' Detached synthesized @@ -67,7 +67,7 @@ TEST_P(SynthesisTest, Leaf_Keyword) { buildTree("", GetParam()); - auto *Leaf = createLeaf(*Arena, tok::kw_if); + auto *Leaf = createLeaf(*Arena, *TM, tok::kw_if); EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( 'if' Detached synthesized @@ -80,7 +80,7 @@ buildTree("", GetParam()); - auto *Leaf = createLeaf(*Arena, tok::kw_nullptr); + auto *Leaf = createLeaf(*Arena, *TM, tok::kw_nullptr); EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( 'nullptr' Detached synthesized @@ -90,7 +90,7 @@ TEST_P(SynthesisTest, Leaf_Identifier) { buildTree("", GetParam()); - auto *Leaf = createLeaf(*Arena, tok::identifier, "a"); + auto *Leaf = createLeaf(*Arena, *TM, tok::identifier, "a"); EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( 'a' Detached synthesized @@ -100,7 +100,7 @@ TEST_P(SynthesisTest, Leaf_Number) { buildTree("", GetParam()); - auto *Leaf = createLeaf(*Arena, tok::numeric_constant, "1"); + auto *Leaf = createLeaf(*Arena, *TM, tok::numeric_constant, "1"); EXPECT_TRUE(treeDumpEqual(Leaf, R"txt( '1' Detached synthesized @@ -120,8 +120,8 @@ TEST_P(SynthesisTest, Tree_Flat) { buildTree("", GetParam()); - auto *LeafLParen = createLeaf(*Arena, tok::l_paren); - auto *LeafRParen = createLeaf(*Arena, tok::r_paren); + auto *LeafLParen = createLeaf(*Arena, *TM, tok::l_paren); + auto *LeafRParen = createLeaf(*Arena, *TM, tok::r_paren); auto *TreeParen = createTree(*Arena, {{LeafLParen, NodeRole::LeftHandSide}, {LeafRParen, NodeRole::RightHandSide}}, @@ -137,13 +137,13 @@ TEST_P(SynthesisTest, Tree_OfTree) { buildTree("", GetParam()); - auto *Leaf1 = createLeaf(*Arena, tok::numeric_constant, "1"); + auto *Leaf1 = createLeaf(*Arena, *TM, tok::numeric_constant, "1"); auto *Int1 = createTree(*Arena, {{Leaf1, NodeRole::LiteralToken}}, NodeKind::IntegerLiteralExpression); - auto *LeafPlus = createLeaf(*Arena, tok::plus); + auto *LeafPlus = createLeaf(*Arena, *TM, tok::plus); - auto *Leaf2 = createLeaf(*Arena, tok::numeric_constant, "2"); + auto *Leaf2 = createLeaf(*Arena, *TM, tok::numeric_constant, "2"); auto *Int2 = createTree(*Arena, {{Leaf2, NodeRole::LiteralToken}}, NodeKind::IntegerLiteralExpression); @@ -166,16 +166,15 @@ TEST_P(SynthesisTest, DeepCopy_Synthesized) { buildTree("", GetParam()); - auto *LeafContinue = createLeaf(*Arena, tok::kw_continue); - auto *LeafSemiColon = createLeaf(*Arena, tok::semi); + auto *LeafContinue = createLeaf(*Arena, *TM, tok::kw_continue); + auto *LeafSemiColon = createLeaf(*Arena, *TM, tok::semi); auto *StatementContinue = createTree(*Arena, {{LeafContinue, NodeRole::LiteralToken}, {LeafSemiColon, NodeRole::Unknown}}, NodeKind::ContinueStatement); - auto *Copy = deepCopyExpandingMacros(*Arena, StatementContinue); - EXPECT_TRUE( - treeDumpEqual(Copy, StatementContinue->dump(Arena->getSourceManager()))); + auto *Copy = deepCopyExpandingMacros(*Arena, *TM, StatementContinue); + EXPECT_TRUE(treeDumpEqual(Copy, StatementContinue->dump(*TM))); // FIXME: Test that copy is independent of original, once the Mutations API is // more developed. } @@ -183,7 +182,7 @@ TEST_P(SynthesisTest, DeepCopy_Original) { auto *OriginalTree = buildTree("int a;", GetParam()); - auto *Copy = deepCopyExpandingMacros(*Arena, OriginalTree); + auto *Copy = deepCopyExpandingMacros(*Arena, *TM, OriginalTree); EXPECT_TRUE(treeDumpEqual(Copy, R"txt( TranslationUnit Detached synthesized `-SimpleDeclaration synthesized @@ -198,7 +197,8 @@ TEST_P(SynthesisTest, DeepCopy_Child) { auto *OriginalTree = buildTree("int a;", GetParam()); - auto *Copy = deepCopyExpandingMacros(*Arena, OriginalTree->getFirstChild()); + auto *Copy = + deepCopyExpandingMacros(*Arena, *TM, OriginalTree->getFirstChild()); EXPECT_TRUE(treeDumpEqual(Copy, R"txt( SimpleDeclaration Detached synthesized |-'int' synthesized @@ -218,7 +218,7 @@ })cpp", GetParam()); - auto *Copy = deepCopyExpandingMacros(*Arena, OriginalTree); + auto *Copy = deepCopyExpandingMacros(*Arena, *TM, OriginalTree); // The syntax tree stores already expanded Tokens, we can only see whether the // macro was expanded when computing replacements. The dump does show that @@ -260,7 +260,7 @@ TEST_P(SynthesisTest, Statement_EmptyStatement) { buildTree("", GetParam()); - auto *S = createEmptyStatement(*Arena); + auto *S = createEmptyStatement(*Arena, *TM); EXPECT_TRUE(treeDumpEqual(S, R"txt( EmptyStatement Detached synthesized `-';' synthesized diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -27,7 +27,7 @@ ChildrenWithRoles.reserve(Children.size()); for (const auto *Child : Children) { ChildrenWithRoles.push_back(std::make_pair( - deepCopyExpandingMacros(*Arena, Child), NodeRole::Unknown)); + deepCopyExpandingMacros(*Arena, *TM, Child), NodeRole::Unknown)); } return clang::syntax::createTree(*Arena, ChildrenWithRoles, NodeKind::UnknownExpression); @@ -108,29 +108,29 @@ TEST_P(TreeTest, FirstLeaf) { buildTree("", GetParam()); - std::vector Leafs = {createLeaf(*Arena, tok::l_paren), - createLeaf(*Arena, tok::r_paren)}; + std::vector Leafs = {createLeaf(*Arena, *TM, tok::l_paren), + createLeaf(*Arena, *TM, tok::r_paren)}; for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) { ASSERT_TRUE(Tree->findFirstLeaf() != nullptr); - EXPECT_EQ(Tree->findFirstLeaf()->getToken()->kind(), tok::l_paren); + EXPECT_EQ(TM->getToken(Tree->findFirstLeaf()->getTokenKey())->kind(), tok::l_paren); } } TEST_P(TreeTest, LastLeaf) { buildTree("", GetParam()); - std::vector Leafs = {createLeaf(*Arena, tok::l_paren), - createLeaf(*Arena, tok::r_paren)}; + std::vector Leafs = {createLeaf(*Arena, *TM, tok::l_paren), + createLeaf(*Arena, *TM, tok::r_paren)}; for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) { ASSERT_TRUE(Tree->findLastLeaf() != nullptr); - EXPECT_EQ(Tree->findLastLeaf()->getToken()->kind(), tok::r_paren); + EXPECT_EQ(TM->getToken(Tree->findLastLeaf()->getTokenKey())->kind(), tok::r_paren); } } TEST_F(TreeTest, Iterators) { buildTree("", allTestClangConfigs().front()); - std::vector Children = {createLeaf(*Arena, tok::identifier, "a"), - createLeaf(*Arena, tok::identifier, "b"), - createLeaf(*Arena, tok::identifier, "c")}; + std::vector Children = {createLeaf(*Arena, *TM, tok::identifier, "a"), + createLeaf(*Arena, *TM, tok::identifier, "b"), + createLeaf(*Arena, *TM, tok::identifier, "c")}; auto *Tree = syntax::createTree(*Arena, {{Children[0], NodeRole::LeftHandSide}, {Children[1], NodeRole::OperatorToken}, @@ -180,7 +180,7 @@ private: std::string dumpQuotedTokensOrNull(const Node *N) { return N ? "'" + - StringRef(N->dumpTokens(Arena->getSourceManager())) + StringRef(N->dumpTokens(*TM)) .trim() .str() + "'" @@ -233,11 +233,11 @@ auto *List = dyn_cast(syntax::createTree( *Arena, { - {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement}, }, NodeKind::CallArguments)); @@ -254,10 +254,10 @@ auto *List = dyn_cast(syntax::createTree( *Arena, { - {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement}, }, NodeKind::CallArguments)); @@ -274,10 +274,10 @@ auto *List = dyn_cast(syntax::createTree( *Arena, { - {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement}, }, NodeKind::CallArguments)); @@ -294,10 +294,10 @@ auto *List = dyn_cast(syntax::createTree( *Arena, { - {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::comma), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::comma), NodeRole::ListDelimiter}, }, NodeKind::CallArguments)); @@ -317,12 +317,12 @@ auto *List = dyn_cast(syntax::createTree( *Arena, { - {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, }, NodeKind::NestedNameSpecifier)); @@ -342,11 +342,11 @@ auto *List = dyn_cast(syntax::createTree( *Arena, { - {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, }, NodeKind::NestedNameSpecifier)); @@ -366,11 +366,11 @@ auto *List = dyn_cast(syntax::createTree( *Arena, { - {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, }, NodeKind::NestedNameSpecifier)); @@ -390,11 +390,11 @@ auto *List = dyn_cast(syntax::createTree( *Arena, { - {createLeaf(*Arena, tok::identifier, "a"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "b"), NodeRole::ListElement}, - {createLeaf(*Arena, tok::coloncolon), NodeRole::ListDelimiter}, - {createLeaf(*Arena, tok::identifier, "c"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::identifier, "a"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "b"), NodeRole::ListElement}, + {createLeaf(*Arena, *TM, tok::coloncolon), NodeRole::ListDelimiter}, + {createLeaf(*Arena, *TM, tok::identifier, "c"), NodeRole::ListElement}, }, NodeKind::NestedNameSpecifier)); diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.h b/clang/unittests/Tooling/Syntax/TreeTestBase.h --- a/clang/unittests/Tooling/Syntax/TreeTestBase.h +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -17,6 +17,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Testing/TestClangConfig.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/StringRef.h" @@ -51,6 +52,7 @@ std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr TB; + std::unique_ptr TM; std::unique_ptr Arena; }; diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -35,13 +35,14 @@ using namespace clang::syntax; namespace { -ArrayRef tokens(syntax::Node *N) { +ArrayRef tokens(syntax::Node *N, + const TokenBufferTokenManager &STM) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) - return llvm::makeArrayRef(L->getToken(), 1); + return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), - T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()), + STM.getToken(T->findLastLeaf()->getTokenKey()) + 1); } } // namespace @@ -70,23 +71,26 @@ public: BuildSyntaxTree(syntax::TranslationUnit *&Root, std::unique_ptr &TB, + std::unique_ptr &TM, std::unique_ptr &Arena, std::unique_ptr Tokens) - : Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { + : Root(Root), TB(TB), TM(TM), Arena(Arena), Tokens(std::move(Tokens)) { assert(this->Tokens); } void HandleTranslationUnit(ASTContext &Ctx) override { TB = std::make_unique(std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. - Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), *TB); - Root = syntax::buildSyntaxTree(*Arena, Ctx); + TM = std::make_unique( + *TB, Ctx.getLangOpts(), Ctx.getSourceManager()); + Arena = std::make_unique(); + Root = syntax::buildSyntaxTree(*Arena, *TM, Ctx); } private: syntax::TranslationUnit *&Root; std::unique_ptr &TB; + std::unique_ptr &TM; std::unique_ptr &Arena; std::unique_ptr Tokens; }; @@ -94,21 +98,23 @@ class BuildSyntaxTreeAction : public ASTFrontendAction { public: BuildSyntaxTreeAction(syntax::TranslationUnit *&Root, + std::unique_ptr &TM, std::unique_ptr &TB, std::unique_ptr &Arena) - : Root(Root), TB(TB), Arena(Arena) {} + : Root(Root), TM(TM), TB(TB), Arena(Arena) {} std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { // We start recording the tokens, ast consumer will take on the result. auto Tokens = std::make_unique(CI.getPreprocessor()); - return std::make_unique(Root, TB, Arena, + return std::make_unique(Root, TB, TM, Arena, std::move(Tokens)); } private: syntax::TranslationUnit *&Root; + std::unique_ptr &TM; std::unique_ptr &TB; std::unique_ptr &Arena; }; @@ -149,7 +155,7 @@ Compiler.setSourceManager(SourceMgr.get()); syntax::TranslationUnit *Root = nullptr; - BuildSyntaxTreeAction Recorder(Root, this->TB, this->Arena); + BuildSyntaxTreeAction Recorder(Root, this->TM, this->TB, this->Arena); // Action could not be executed but the frontend didn't identify any errors // in the code ==> problem in setting up the action. @@ -163,7 +169,7 @@ syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { - ArrayRef Toks = tokens(Root); + ArrayRef Toks = tokens(Root, *TM); if (Toks.front().location().isFileID() && Toks.back().location().isFileID() && syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) ==