diff --git a/clang/include/clang/Tooling/Syntax/Mutations.h b/clang/include/clang/Tooling/Syntax/Mutations.h --- a/clang/include/clang/Tooling/Syntax/Mutations.h +++ b/clang/include/clang/Tooling/Syntax/Mutations.h @@ -13,6 +13,7 @@ #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tree.h" namespace clang { @@ -20,7 +21,7 @@ /// Computes textual replacements required to mimic the tree modifications made /// to the syntax tree. -tooling::Replacements computeReplacements(const Arena &A, +tooling::Replacements computeReplacements(const TokenBufferTokenManager &TBTM, const syntax::TranslationUnit &TU); /// Removes a statement or replaces it with an empty statement where one is diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -21,9 +21,7 @@ #ifndef LLVM_CLANG_TOOLING_SYNTAX_NODES_H #define LLVM_CLANG_TOOLING_SYNTAX_NODES_H -#include "clang/Basic/TokenKinds.h" -#include "clang/Lex/Token.h" -#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Basic/LLVM.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" diff --git a/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h b/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/TokenBufferTokenManager.h @@ -0,0 +1,70 @@ +//===- TokenBufferTokenManager.h -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H +#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H + +#include "clang/Tooling/Syntax/TokenManager.h" +#include "clang/Tooling/Syntax/Tokens.h" + +namespace clang { +namespace syntax { + +/// A TokenBuffer-powered token manager. +/// It tracks the underlying token buffers, source manager, etc. +class TokenBufferTokenManager : public TokenManager { +public: + TokenBufferTokenManager(const TokenBuffer &Tokens, + const LangOptions &LangOpts, SourceManager &SourceMgr) + : Tokens(Tokens), LangOpts(LangOpts), SM(SourceMgr) {} + + static bool classof(const TokenManager *N) { return N->kind() == Kind; } + llvm::StringLiteral kind() const override { return Kind; } + + llvm::StringRef getText(Key I) const override { + const auto *Token = getToken(I); + assert(Token); + // Handle 'eof' separately, calling text() on it produces an empty string. + // FIXME: this special logic is for syntax::Leaf dump, move it when we + // have a direct way to retrive token kind in the syntax::Leaf. + if (Token->kind() == tok::eof) + return ""; + return Token->text(SM); + } + + const syntax::Token *getToken(Key I) const { + return reinterpret_cast(I); + } + SourceManager &sourceManager() { return SM; } + const SourceManager &sourceManager() const { return SM; } + const TokenBuffer &tokenBuffer() const { return Tokens; } + +private: + // This manager is powered by the TokenBuffer. + static constexpr llvm::StringLiteral Kind = "TokenBuffer"; + + /// Add \p Buffer to the underlying source manager, tokenize it and store the + /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens + /// that were not written in user code. + std::pair> + lexBuffer(std::unique_ptr Buffer); + friend class FactoryImpl; + + const TokenBuffer &Tokens; + const LangOptions &LangOpts; + + /// The underlying source manager for the ExtraTokens. + SourceManager &SM; + /// IDs and storage for additional tokenized files. + llvm::DenseMap> ExtraTokens; +}; + +} // namespace syntax +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_BUFFER_TOKEN_MANAGER_H diff --git a/clang/include/clang/Tooling/Syntax/TokenManager.h b/clang/include/clang/Tooling/Syntax/TokenManager.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/TokenManager.h @@ -0,0 +1,46 @@ +//===- TokenManager.h - Manage Tokens for syntax-tree ------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines Token interfaces for the clang syntax-tree. This is the level of +// abstraction that the syntax-tree uses to operate on Token. +// +// TokenManager decouples the syntax-tree from a particular token +// implementation. For example, a TokenBuffer captured from a clang parser may +// track macro expansions and associate tokens with clang's SourceManager, while +// a clang pseudoparser would use a flat array of raw-lexed tokens in memory. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H +#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H + +#include "llvm/ADT/StringRef.h" +#include + +namespace clang { +namespace syntax { + +/// Defines interfaces for operating "Token" in the clang syntax-tree. +class TokenManager { +public: + /// Describes what the exact class kind of the TokenManager is. + virtual llvm::StringLiteral kind() const = 0; + + /// A key to identify a specific token. The token concept depends on the + /// underlying implementation -- it can be a spelled token from the original + /// source file or an expanded token. + /// The syntax-tree Leaf node holds a Key. + using Key = uintptr_t; + /// Gets the text of token identified by the key. + virtual llvm::StringRef getText(Key K) const = 0; +}; + +} // namespace syntax +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -33,6 +33,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" +#include "clang/Tooling/Syntax/TokenManager.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // Defines the basic structure of the syntax tree. There are two kinds of nodes: -// - leaf nodes correspond to a token in the expanded token stream, +// - leaf nodes correspond to tokens, // - tree nodes correspond to language grammar constructs. // // The tree is initially built from an AST. Each node of a newly built tree @@ -21,11 +21,8 @@ #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" -#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/TokenManager.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/iterator.h" @@ -36,33 +33,17 @@ namespace clang { namespace syntax { -/// A memory arena for syntax trees. Also tracks the underlying token buffers, -/// source manager, etc. +/// A memory arena for syntax trees. class Arena { public: - Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - const TokenBuffer &Tokens); - - const SourceManager &getSourceManager() const { return SourceMgr; } - const LangOptions &getLangOptions() const { return LangOpts; } - - const TokenBuffer &getTokenBuffer() const; + Arena(TokenManager& TokenMgr) : TokenMgr(TokenMgr) {} llvm::BumpPtrAllocator &getAllocator() { return Allocator; } -private: - /// Add \p Buffer to the underlying source manager, tokenize it and store the - /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens - /// that were not written in user code. - std::pair> - lexBuffer(std::unique_ptr Buffer); - friend class FactoryImpl; + const TokenManager &getTokenManager() const { return TokenMgr; } + TokenManager &getTokenManager() { return TokenMgr; } private: - SourceManager &SourceMgr; - const LangOptions &LangOpts; - const TokenBuffer &Tokens; - /// IDs and storage for additional tokenized files. - llvm::DenseMap> ExtraTokens; + TokenManager& TokenMgr; /// Keeps all the allocated nodes and their intermediate data structures. llvm::BumpPtrAllocator Allocator; }; @@ -122,9 +103,9 @@ Node *getPreviousSibling() { return PreviousSibling; } /// Dumps the structure of a subtree. For debugging and testing purposes. - std::string dump(const SourceManager &SM) const; + std::string dump(const TokenManager &SM) const; /// Dumps the tokens forming this subtree. - std::string dumpTokens(const SourceManager &SM) const; + std::string dumpTokens(const TokenManager &SM) const; /// Asserts invariants on this node of the tree and its immediate children. /// Will not recurse into the subtree. No-op if NDEBUG is set. @@ -153,16 +134,17 @@ unsigned CanModify : 1; }; -/// A leaf node points to a single token inside the expanded token stream. +/// A leaf node points to a single token. +// FIXME: add TokenKind field (borrow some bits from the Node::kind). class Leaf final : public Node { public: - Leaf(const Token *T); + Leaf(TokenManager::Key K); static bool classof(const Node *N); - const Token *getToken() const { return Tok; } + TokenManager::Key getTokenKey() const { return K; } private: - const Token *Tok; + TokenManager::Key K; }; /// A node that has children and represents a syntactic language construct. diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -27,6 +27,7 @@ #include "clang/Lex/Lexer.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" @@ -365,21 +366,24 @@ /// Call finalize() to finish building the tree and consume the root node. class syntax::TreeBuilder { public: - TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { - for (const auto &T : Arena.getTokenBuffer().expandedTokens()) + TreeBuilder(syntax::Arena &Arena) + : Arena(Arena), + TBTM(cast(Arena.getTokenManager())), + Pending(Arena, TBTM.tokenBuffer()) { + for (const auto &T : TBTM.tokenBuffer().expandedTokens()) LocationToToken.insert({T.location(), &T}); } llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } const SourceManager &sourceManager() const { - return Arena.getSourceManager(); + return TBTM.sourceManager(); } /// Populate children for \p New node, assuming it covers tokens from \p /// Range. void foldNode(ArrayRef Range, syntax::Tree *New, ASTPtr From) { assert(New); - Pending.foldChildren(Arena, Range, New); + Pending.foldChildren(TBTM.tokenBuffer(), Range, New); if (From) Mapping.add(From, New); } @@ -392,7 +396,7 @@ void foldNode(llvm::ArrayRef Range, syntax::Tree *New, NestedNameSpecifierLoc From) { assert(New); - Pending.foldChildren(Arena, Range, New); + Pending.foldChildren(TBTM.tokenBuffer(), Range, New); if (From) Mapping.add(From, New); } @@ -403,7 +407,7 @@ ASTPtr From) { assert(New); auto ListRange = Pending.shrinkToFitList(SuperRange); - Pending.foldChildren(Arena, ListRange, New); + Pending.foldChildren(TBTM.tokenBuffer(), ListRange, New); if (From) Mapping.add(From, New); } @@ -434,12 +438,12 @@ /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { - auto Tokens = Arena.getTokenBuffer().expandedTokens(); + auto Tokens = TBTM.tokenBuffer().expandedTokens(); assert(!Tokens.empty()); assert(Tokens.back().kind() == tok::eof); // Build the root of the tree, consuming all the children. - Pending.foldChildren(Arena, Tokens.drop_back(), + Pending.foldChildren(TBTM.tokenBuffer(), Tokens.drop_back(), new (Arena.getAllocator()) syntax::TranslationUnit); auto *TU = cast(std::move(Pending).finalize()); @@ -464,7 +468,7 @@ assert(First.isValid()); assert(Last.isValid()); assert(First == Last || - Arena.getSourceManager().isBeforeInTranslationUnit(First, Last)); + TBTM.sourceManager().isBeforeInTranslationUnit(First, Last)); return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } @@ -564,15 +568,16 @@ /// /// Ensures that added nodes properly nest and cover the whole token stream. struct Forest { - Forest(syntax::Arena &A) { - assert(!A.getTokenBuffer().expandedTokens().empty()); - assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof); + Forest(syntax::Arena &A, const syntax::TokenBuffer &TB) { + assert(!TB.expandedTokens().empty()); + assert(TB.expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) { - auto *L = new (A.getAllocator()) syntax::Leaf(&T); + for (const auto &T : TB.expandedTokens().drop_back()) { + auto *L = new (A.getAllocator()) + syntax::Leaf(reinterpret_cast(&T)); L->Original = true; - L->CanModify = A.getTokenBuffer().spelledForExpanded(T).has_value(); + L->CanModify = TB.spelledForExpanded(T).has_value(); Trees.insert(Trees.end(), {&T, L}); } } @@ -620,8 +625,8 @@ } /// Add \p Node to the forest and attach child nodes based on \p Tokens. - void foldChildren(const syntax::Arena &A, ArrayRef Tokens, - syntax::Tree *Node) { + void foldChildren(const syntax::TokenBuffer &TB, + ArrayRef Tokens, syntax::Tree *Node) { // Attach children to `Node`. assert(Node->getFirstChild() == nullptr && "node already has children"); @@ -646,7 +651,7 @@ // Mark that this node came from the AST and is backed by the source code. Node->Original = true; Node->CanModify = - A.getTokenBuffer().spelledForExpanded(Tokens).has_value(); + TB.spelledForExpanded(Tokens).has_value(); Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, Node}); @@ -660,18 +665,18 @@ return Root; } - std::string str(const syntax::Arena &A) const { + std::string str(const syntax::TokenBufferTokenManager &STM) const { std::string R; for (auto It = Trees.begin(); It != Trees.end(); ++It) { unsigned CoveredTokens = It != Trees.end() ? (std::next(It)->first - It->first) - : A.getTokenBuffer().expandedTokens().end() - It->first; + : STM.tokenBuffer().expandedTokens().end() - It->first; R += std::string( formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(), - It->first->text(A.getSourceManager()), CoveredTokens)); - R += It->second->dump(A.getSourceManager()); + It->first->text(STM.sourceManager()), CoveredTokens)); + R += It->second->dump(STM); } return R; } @@ -684,9 +689,10 @@ }; /// For debugging purposes. - std::string str() { return Pending.str(Arena); } + std::string str() { return Pending.str(TBTM); } syntax::Arena &Arena; + TokenBufferTokenManager& TBTM; /// To quickly find tokens by their start location. llvm::DenseMap LocationToToken; Forest Pending; @@ -1718,7 +1724,7 @@ markExprChild(ChildExpr, NodeRole::Expression); ChildNode = new (allocator()) syntax::ExpressionStatement; // (!) 'getStmtRange()' ensures this covers a trailing semicolon. - Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); + Pending.foldChildren(TBTM.tokenBuffer(), getStmtRange(Child), ChildNode); } else { ChildNode = Mapping.find(Child); } diff --git a/clang/lib/Tooling/Syntax/CMakeLists.txt b/clang/lib/Tooling/Syntax/CMakeLists.txt --- a/clang/lib/Tooling/Syntax/CMakeLists.txt +++ b/clang/lib/Tooling/Syntax/CMakeLists.txt @@ -5,6 +5,7 @@ ComputeReplacements.cpp Nodes.cpp Mutations.cpp + TokenBufferTokenManager.cpp Synthesis.cpp Tokens.cpp Tree.cpp diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp --- a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -7,7 +7,9 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Mutations.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" #include "llvm/Support/Error.h" using namespace clang; @@ -16,10 +18,13 @@ using ProcessTokensFn = llvm::function_ref, bool /*IsOriginal*/)>; /// Enumerates spans of tokens from the tree consecutively laid out in memory. -void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { +void enumerateTokenSpans(const syntax::Tree *Root, + const syntax::TokenBufferTokenManager &STM, + ProcessTokensFn Callback) { struct Enumerator { - Enumerator(ProcessTokensFn Callback) - : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), + Enumerator(const syntax::TokenBufferTokenManager &STM, + ProcessTokensFn Callback) + : STM(STM), SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), Callback(Callback) {} void run(const syntax::Tree *Root) { @@ -39,7 +44,8 @@ } auto *L = cast(N); - if (SpanEnd == L->getToken() && SpanIsOriginal == L->isOriginal()) { + if (SpanEnd == STM.getToken(L->getTokenKey()) && + SpanIsOriginal == L->isOriginal()) { // Extend the current span. ++SpanEnd; return; @@ -48,24 +54,25 @@ if (SpanBegin) Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); // Start recording a new span. - SpanBegin = L->getToken(); + SpanBegin = STM.getToken(L->getTokenKey()); SpanEnd = SpanBegin + 1; SpanIsOriginal = L->isOriginal(); } + const syntax::TokenBufferTokenManager &STM; const syntax::Token *SpanBegin; const syntax::Token *SpanEnd; bool SpanIsOriginal; ProcessTokensFn Callback; }; - return Enumerator(Callback).run(Root); + return Enumerator(STM, Callback).run(Root); } -syntax::FileRange rangeOfExpanded(const syntax::Arena &A, +syntax::FileRange rangeOfExpanded(const syntax::TokenBufferTokenManager &STM, llvm::ArrayRef Expanded) { - const auto &Buffer = A.getTokenBuffer(); - const auto &SM = A.getSourceManager(); + const auto &Buffer = STM.tokenBuffer(); + const auto &SM = STM.sourceManager(); // Check that \p Expanded actually points into expanded tokens. assert(Buffer.expandedTokens().begin() <= Expanded.begin()); @@ -83,10 +90,10 @@ } // namespace tooling::Replacements -syntax::computeReplacements(const syntax::Arena &A, +syntax::computeReplacements(const TokenBufferTokenManager &TBTM, const syntax::TranslationUnit &TU) { - const auto &Buffer = A.getTokenBuffer(); - const auto &SM = A.getSourceManager(); + const auto &Buffer = TBTM.tokenBuffer(); + const auto &SM = TBTM.sourceManager(); tooling::Replacements Replacements; // Text inserted by the replacement we are building now. @@ -95,13 +102,13 @@ if (ReplacedRange.empty() && Replacement.empty()) return; llvm::cantFail(Replacements.add(tooling::Replacement( - SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement))); + SM, rangeOfExpanded(TBTM, ReplacedRange).toCharRange(SM), + Replacement))); Replacement = ""; }; - const syntax::Token *NextOriginal = Buffer.expandedTokens().begin(); enumerateTokenSpans( - &TU, [&](llvm::ArrayRef Tokens, bool IsOriginal) { + &TU, TBTM, [&](llvm::ArrayRef Tokens, bool IsOriginal) { if (!IsOriginal) { Replacement += syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM); diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -8,6 +8,8 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/BuildTree.h" #include "clang/Tooling/Syntax/Tree.h" +#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" using namespace clang; @@ -28,7 +30,8 @@ static std::pair> lexBuffer(syntax::Arena &A, std::unique_ptr Buffer) { - return A.lexBuffer(std::move(Buffer)); + auto& STM = llvm::cast(A.getTokenManager()); + return STM.lexBuffer(std::move(Buffer)); } }; @@ -43,7 +46,8 @@ assert(Tokens.front().kind() == K && "spelling is not lexed into the expected kind of token"); - auto *Leaf = new (A.getAllocator()) syntax::Leaf(Tokens.begin()); + auto *Leaf = new (A.getAllocator()) syntax::Leaf( + reinterpret_cast(Tokens.begin())); syntax::FactoryImpl::setCanModify(Leaf); Leaf->assertInvariants(); return Leaf; @@ -209,11 +213,12 @@ syntax::Node *clang::syntax::deepCopyExpandingMacros(syntax::Arena &A, const syntax::Node *N) { + const auto& TBTM = llvm::cast(A.getTokenManager()); if (const auto *L = dyn_cast(N)) // `L->getToken()` gives us the expanded token, thus we implicitly expand // any macros here. - return createLeaf(A, L->getToken()->kind(), - L->getToken()->text(A.getSourceManager())); + return createLeaf(A, TBTM.getToken(L->getTokenKey())->kind(), + TBTM.getText(L->getTokenKey())); const auto *T = cast(N); std::vector> Children; diff --git a/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp b/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/TokenBufferTokenManager.cpp @@ -0,0 +1,25 @@ +//===- TokenBufferTokenManager.cpp ----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" + +namespace clang { +namespace syntax { +constexpr llvm::StringLiteral syntax::TokenBufferTokenManager::Kind; + +std::pair> +syntax::TokenBufferTokenManager::lexBuffer( + std::unique_ptr Input) { + auto FID = SM.createFileID(std::move(Input)); + auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SM, LangOpts)); + assert(It.second && "duplicate FileID"); + return {FID, It.first->second}; +} + +} // namespace syntax +} // namespace clang diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -33,25 +33,7 @@ } } // namespace -syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - const TokenBuffer &Tokens) - : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} - -const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { - return Tokens; -} - -std::pair> -syntax::Arena::lexBuffer(std::unique_ptr Input) { - auto FID = SourceMgr.createFileID(std::move(Input)); - auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); - assert(It.second && "duplicate FileID"); - return {FID, It.first->second}; -} - -syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { - assert(Tok != nullptr); -} +syntax::Leaf::Leaf(syntax::TokenManager::Key K) : Node(NodeKind::Leaf), K(K) {} syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr), @@ -190,20 +172,8 @@ } namespace { -static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, - const SourceManager &SM) { - assert(L); - const auto *Token = L->getToken(); - assert(Token); - // Handle 'eof' separately, calling text() on it produces an empty string. - if (Token->kind() == tok::eof) - OS << ""; - else - OS << Token->text(SM); -} - static void dumpNode(raw_ostream &OS, const syntax::Node *N, - const SourceManager &SM, llvm::BitVector IndentMask) { + const syntax::TokenManager &TM, llvm::BitVector IndentMask) { auto DumpExtraInfo = [&OS](const syntax::Node *N) { if (N->getRole() != syntax::NodeRole::Unknown) OS << " " << N->getRole(); @@ -216,7 +186,7 @@ assert(N); if (const auto *L = dyn_cast(N)) { OS << "'"; - dumpLeaf(OS, L, SM); + OS << TM.getText(L->getTokenKey()); OS << "'"; DumpExtraInfo(N); OS << "\n"; @@ -242,25 +212,25 @@ OS << "|-"; IndentMask.push_back(true); } - dumpNode(OS, &It, SM, IndentMask); + dumpNode(OS, &It, TM, IndentMask); IndentMask.pop_back(); } } } // namespace -std::string syntax::Node::dump(const SourceManager &SM) const { +std::string syntax::Node::dump(const TokenManager &TM) const { std::string Str; llvm::raw_string_ostream OS(Str); - dumpNode(OS, this, SM, /*IndentMask=*/{}); + dumpNode(OS, this, TM, /*IndentMask=*/{}); return std::move(OS.str()); } -std::string syntax::Node::dumpTokens(const SourceManager &SM) const { +std::string syntax::Node::dumpTokens(const TokenManager &TM) const { std::string Storage; llvm::raw_string_ostream OS(Storage); traverse(this, [&](const syntax::Node *N) { if (const auto *L = dyn_cast(N)) { - dumpLeaf(OS, L, SM); + OS << TM.getText(L->getTokenKey()); OS << " "; } }); @@ -297,7 +267,8 @@ C.getRole() == NodeRole::ListDelimiter); if (C.getRole() == NodeRole::ListDelimiter) { assert(isa(C)); - assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); + // FIXME: re-enable it when there is way to retrieve token kind in Leaf. + // assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); } } diff --git a/clang/tools/clang-check/ClangCheck.cpp b/clang/tools/clang-check/ClangCheck.cpp --- a/clang/tools/clang-check/ClangCheck.cpp +++ b/clang/tools/clang-check/ClangCheck.cpp @@ -25,6 +25,7 @@ #include "clang/StaticAnalyzer/Frontend/FrontendActions.h" #include "clang/Tooling/CommonOptionsParser.h" #include "clang/Tooling/Syntax/BuildTree.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "clang/Tooling/Tooling.h" @@ -157,9 +158,10 @@ clang::syntax::TokenBuffer TB = std::move(Collector).consume(); if (TokensDump) llvm::outs() << TB.dumpForTests(); - clang::syntax::Arena A(AST.getSourceManager(), AST.getLangOpts(), TB); - llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump( - AST.getSourceManager()); + clang::syntax::TokenBufferTokenManager TBTM(TB, AST.getLangOpts(), + AST.getSourceManager()); + clang::syntax::Arena A(TBTM); + llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump(TBTM); } private: diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -26,7 +26,7 @@ auto ErrorOK = errorOK(Code); if (!ErrorOK) return ErrorOK; - auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); + auto Actual = StringRef(Root->dump(Arena->getTokenManager())).trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Tree.trim().str(), Actual); if (Actual != Tree.trim().str()) { @@ -59,7 +59,7 @@ auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root); assert(AnnotatedNode); auto AnnotatedNodeDump = - StringRef(AnnotatedNode->dump(Arena->getSourceManager())) + StringRef(AnnotatedNode->dump(Arena->getTokenManager())) .trim() .str(); // EXPECT_EQ shows the diff between the two strings if they are different. diff --git a/clang/unittests/Tooling/Syntax/MutationsTest.cpp b/clang/unittests/Tooling/Syntax/MutationsTest.cpp --- a/clang/unittests/Tooling/Syntax/MutationsTest.cpp +++ b/clang/unittests/Tooling/Syntax/MutationsTest.cpp @@ -30,7 +30,7 @@ Transform(Source, Root); - auto Replacements = syntax::computeReplacements(*Arena, *Root); + auto Replacements = syntax::computeReplacements(*TM, *Root); auto Output = tooling::applyAllReplacements(Source.code(), Replacements); if (!Output) { ADD_FAILURE() << "could not apply replacements: " diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp --- a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -27,7 +27,7 @@ return ::testing::AssertionFailure() << "Root was not built successfully."; - auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); + auto Actual = StringRef(Root->dump(Arena->getTokenManager())).trim().str(); auto Expected = Dump.trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Expected, Actual); @@ -175,7 +175,7 @@ auto *Copy = deepCopyExpandingMacros(*Arena, StatementContinue); EXPECT_TRUE( - treeDumpEqual(Copy, StatementContinue->dump(Arena->getSourceManager()))); + treeDumpEqual(Copy, StatementContinue->dump(Arena->getTokenManager()))); // FIXME: Test that copy is independent of original, once the Mutations API is // more developed. } diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -112,7 +112,7 @@ createLeaf(*Arena, tok::r_paren)}; for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) { ASSERT_TRUE(Tree->findFirstLeaf() != nullptr); - EXPECT_EQ(Tree->findFirstLeaf()->getToken()->kind(), tok::l_paren); + EXPECT_EQ(TM->getToken(Tree->findFirstLeaf()->getTokenKey())->kind(), tok::l_paren); } } @@ -122,7 +122,7 @@ createLeaf(*Arena, tok::r_paren)}; for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) { ASSERT_TRUE(Tree->findLastLeaf() != nullptr); - EXPECT_EQ(Tree->findLastLeaf()->getToken()->kind(), tok::r_paren); + EXPECT_EQ(TM->getToken(Tree->findLastLeaf()->getTokenKey())->kind(), tok::r_paren); } } @@ -180,7 +180,7 @@ private: std::string dumpQuotedTokensOrNull(const Node *N) { return N ? "'" + - StringRef(N->dumpTokens(Arena->getSourceManager())) + StringRef(N->dumpTokens(Arena->getTokenManager())) .trim() .str() + "'" diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.h b/clang/unittests/Tooling/Syntax/TreeTestBase.h --- a/clang/unittests/Tooling/Syntax/TreeTestBase.h +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -17,6 +17,7 @@ #include "clang/Frontend/CompilerInvocation.h" #include "clang/Testing/TestClangConfig.h" #include "clang/Tooling/Syntax/Nodes.h" +#include "clang/Tooling/Syntax/TokenBufferTokenManager.h" #include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/StringRef.h" @@ -51,6 +52,7 @@ std::shared_ptr Invocation; // Set after calling buildTree(). std::unique_ptr TB; + std::unique_ptr TM; std::unique_ptr Arena; }; diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -35,13 +35,14 @@ using namespace clang::syntax; namespace { -ArrayRef tokens(syntax::Node *N) { +ArrayRef tokens(syntax::Node *N, + const TokenBufferTokenManager &STM) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) - return llvm::makeArrayRef(L->getToken(), 1); + return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), - T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()), + STM.getToken(T->findLastLeaf()->getTokenKey()) + 1); } } // namespace @@ -70,23 +71,26 @@ public: BuildSyntaxTree(syntax::TranslationUnit *&Root, std::unique_ptr &TB, + std::unique_ptr &TM, std::unique_ptr &Arena, std::unique_ptr Tokens) - : Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { + : Root(Root), TB(TB), TM(TM), Arena(Arena), Tokens(std::move(Tokens)) { assert(this->Tokens); } void HandleTranslationUnit(ASTContext &Ctx) override { TB = std::make_unique(std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. - Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), *TB); + TM = std::make_unique( + *TB, Ctx.getLangOpts(), Ctx.getSourceManager()); + Arena = std::make_unique(*TM); Root = syntax::buildSyntaxTree(*Arena, Ctx); } private: syntax::TranslationUnit *&Root; std::unique_ptr &TB; + std::unique_ptr &TM; std::unique_ptr &Arena; std::unique_ptr Tokens; }; @@ -94,21 +98,23 @@ class BuildSyntaxTreeAction : public ASTFrontendAction { public: BuildSyntaxTreeAction(syntax::TranslationUnit *&Root, + std::unique_ptr &TM, std::unique_ptr &TB, std::unique_ptr &Arena) - : Root(Root), TB(TB), Arena(Arena) {} + : Root(Root), TM(TM), TB(TB), Arena(Arena) {} std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { // We start recording the tokens, ast consumer will take on the result. auto Tokens = std::make_unique(CI.getPreprocessor()); - return std::make_unique(Root, TB, Arena, + return std::make_unique(Root, TB, TM, Arena, std::move(Tokens)); } private: syntax::TranslationUnit *&Root; + std::unique_ptr &TM; std::unique_ptr &TB; std::unique_ptr &Arena; }; @@ -149,7 +155,7 @@ Compiler.setSourceManager(SourceMgr.get()); syntax::TranslationUnit *Root = nullptr; - BuildSyntaxTreeAction Recorder(Root, this->TB, this->Arena); + BuildSyntaxTreeAction Recorder(Root, this->TM, this->TB, this->Arena); // Action could not be executed but the frontend didn't identify any errors // in the code ==> problem in setting up the action. @@ -163,7 +169,7 @@ syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { - ArrayRef Toks = tokens(Root); + ArrayRef Toks = tokens(Root, *TM); if (Toks.front().location().isFileID() && Toks.back().location().isFileID() && syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) ==