diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -23,7 +23,6 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" -#include "clang/Tooling/Syntax/Tokens.h" #include "clang/Tooling/Syntax/Tree.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" diff --git a/clang/include/clang/Tooling/Syntax/TokenManager.h b/clang/include/clang/Tooling/Syntax/TokenManager.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Syntax/TokenManager.h @@ -0,0 +1,43 @@ +//===- TokenManager.h - Manage Tokens for syntax-tree ------------*- C++-*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Defines Token interfaces for the syntax-tree, decoupling the syntax-tree from +// the TokenBuffer. It enables producers (e.g. clang pseudoparser) to produce a +// synatx-tree with different token implementation. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H +#define LLVM_CLANG_TOOLING_SYNTAX_TOKEN_MANAGER_H + +#include "llvm/ADT/StringRef.h" + +namespace clang { +namespace syntax { + +/// Base token interfaces for the syntax-tree. +class TokenManager { +public: + /// Describes what the exact class kind of the TokenManager is. + virtual llvm::StringLiteral kind() const = 0; + + /// A key to identify a specific token. The token concept depends on the + /// underlying implementation -- it can be a spelled token from the original + /// source file or an expanded token. + /// The syntax-tree Leaf node holds a Key. + using Key = const void *; + /// Gets the text of token identified by the key. + virtual llvm::StringRef getText(Key K) const = 0; + + // FIXME: add an interface for getting token kind. +}; + +} // namespace syntax +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_SYNTAX_BASE_TOKEN_H diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -33,6 +33,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" +#include "clang/Tooling/Syntax/TokenManager.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" @@ -457,6 +458,53 @@ CollectPPExpansions *Collector; }; +/// A TokenBuffer-powered token manager. +/// It tracks the underlying token buffers, source manager, etc. +class SyntaxTokenManager : public TokenManager { +public: + SyntaxTokenManager(SourceManager &SourceMgr, const LangOptions &LangOpts, + const TokenBuffer &Tokens) + : SM(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} + + static bool classof(const TokenManager *N) { return N->kind() == Kind; } + llvm::StringLiteral kind() const override { return Kind; } + + llvm::StringRef getText(Key I) const override { + const auto *Token = getToken(I); + assert(Token); + // Handle 'eof' separately, calling text() on it produces an empty string. + if (Token->kind() == tok::eof) + return ""; + + return Token->text(SM); + } + + const syntax::Token *getToken(Key I) const { + return reinterpret_cast(I); + } + SourceManager &getSourceManager() { return SM; } + const SourceManager &getSourceManager() const { return SM; } + const TokenBuffer &getTokenBuffer() const { return Tokens; } + +private: + // This mangaer is powered by the TokenBuffer. + static constexpr llvm::StringLiteral Kind = "TokenBuffer"; + + /// Add \p Buffer to the underlying source manager, tokenize it and store the + /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens + /// that were not written in user code. + std::pair> + lexBuffer(std::unique_ptr Buffer); + friend class FactoryImpl; + + SourceManager &SM; + const LangOptions &LangOpts; + const TokenBuffer &Tokens; + /// IDs and storage for additional tokenized files. + llvm::DenseMap> ExtraTokens; +}; + + } // namespace syntax } // namespace clang diff --git a/clang/include/clang/Tooling/Syntax/Tree.h b/clang/include/clang/Tooling/Syntax/Tree.h --- a/clang/include/clang/Tooling/Syntax/Tree.h +++ b/clang/include/clang/Tooling/Syntax/Tree.h @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// // Defines the basic structure of the syntax tree. There are two kinds of nodes: -// - leaf nodes correspond to a token in the expanded token stream, +// - leaf nodes correspond to a token key in the token manager // - tree nodes correspond to language grammar constructs. // // The tree is initially built from an AST. Each node of a newly built tree @@ -21,11 +21,8 @@ #ifndef LLVM_CLANG_TOOLING_SYNTAX_TREE_H #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H -#include "clang/Basic/LangOptions.h" -#include "clang/Basic/SourceLocation.h" -#include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" -#include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/TokenManager.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/iterator.h" @@ -36,33 +33,18 @@ namespace clang { namespace syntax { -/// A memory arena for syntax trees. Also tracks the underlying token buffers, -/// source manager, etc. +/// A memory arena for syntax trees. class Arena { public: - Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - const TokenBuffer &Tokens); - - const SourceManager &getSourceManager() const { return SourceMgr; } - const LangOptions &getLangOptions() const { return LangOpts; } - - const TokenBuffer &getTokenBuffer() const; + Arena(TokenManager& TokenMgr) : TokenMgr(TokenMgr) {} llvm::BumpPtrAllocator &getAllocator() { return Allocator; } -private: - /// Add \p Buffer to the underlying source manager, tokenize it and store the - /// resulting tokens. Used exclusively in `FactoryImpl` to materialize tokens - /// that were not written in user code. - std::pair> - lexBuffer(std::unique_ptr Buffer); - friend class FactoryImpl; + const TokenManager &getTokenManager() const { return TokenMgr; } + TokenManager &getTokenManager() { return TokenMgr; } private: - SourceManager &SourceMgr; - const LangOptions &LangOpts; - const TokenBuffer &Tokens; - /// IDs and storage for additional tokenized files. - llvm::DenseMap> ExtraTokens; + // Manage all token-related stuff. + TokenManager& TokenMgr; /// Keeps all the allocated nodes and their intermediate data structures. llvm::BumpPtrAllocator Allocator; }; @@ -122,9 +104,9 @@ Node *getPreviousSibling() { return PreviousSibling; } /// Dumps the structure of a subtree. For debugging and testing purposes. - std::string dump(const SourceManager &SM) const; + std::string dump(const TokenManager &SM) const; /// Dumps the tokens forming this subtree. - std::string dumpTokens(const SourceManager &SM) const; + std::string dumpTokens(const TokenManager &SM) const; /// Asserts invariants on this node of the tree and its immediate children. /// Will not recurse into the subtree. No-op if NDEBUG is set. @@ -153,16 +135,15 @@ unsigned CanModify : 1; }; -/// A leaf node points to a single token inside the expanded token stream. class Leaf final : public Node { public: - Leaf(const Token *T); + Leaf(TokenManager::Key K); static bool classof(const Node *N); - const Token *getToken() const { return Tok; } + TokenManager::Key getTokenKey() const { return K; } private: - const Token *Tok; + TokenManager::Key K; }; /// A node that has children and represents a syntactic language construct. diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -365,21 +365,23 @@ /// Call finalize() to finish building the tree and consume the root node. class syntax::TreeBuilder { public: - TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) { - for (const auto &T : Arena.getTokenBuffer().expandedTokens()) + TreeBuilder(syntax::Arena &Arena) + : Arena(Arena), STM(cast(Arena.getTokenManager())), + Pending(Arena, STM) { + for (const auto &T : STM.getTokenBuffer().expandedTokens()) LocationToToken.insert({T.location(), &T}); } llvm::BumpPtrAllocator &allocator() { return Arena.getAllocator(); } const SourceManager &sourceManager() const { - return Arena.getSourceManager(); + return STM.getSourceManager(); } /// Populate children for \p New node, assuming it covers tokens from \p /// Range. void foldNode(ArrayRef Range, syntax::Tree *New, ASTPtr From) { assert(New); - Pending.foldChildren(Arena, Range, New); + Pending.foldChildren(STM, Range, New); if (From) Mapping.add(From, New); } @@ -392,7 +394,7 @@ void foldNode(llvm::ArrayRef Range, syntax::Tree *New, NestedNameSpecifierLoc From) { assert(New); - Pending.foldChildren(Arena, Range, New); + Pending.foldChildren(STM, Range, New); if (From) Mapping.add(From, New); } @@ -403,7 +405,7 @@ ASTPtr From) { assert(New); auto ListRange = Pending.shrinkToFitList(SuperRange); - Pending.foldChildren(Arena, ListRange, New); + Pending.foldChildren(STM, ListRange, New); if (From) Mapping.add(From, New); } @@ -434,12 +436,12 @@ /// Finish building the tree and consume the root node. syntax::TranslationUnit *finalize() && { - auto Tokens = Arena.getTokenBuffer().expandedTokens(); + auto Tokens = STM.getTokenBuffer().expandedTokens(); assert(!Tokens.empty()); assert(Tokens.back().kind() == tok::eof); // Build the root of the tree, consuming all the children. - Pending.foldChildren(Arena, Tokens.drop_back(), + Pending.foldChildren(STM, Tokens.drop_back(), new (Arena.getAllocator()) syntax::TranslationUnit); auto *TU = cast(std::move(Pending).finalize()); @@ -464,7 +466,7 @@ assert(First.isValid()); assert(Last.isValid()); assert(First == Last || - Arena.getSourceManager().isBeforeInTranslationUnit(First, Last)); + STM.getSourceManager().isBeforeInTranslationUnit(First, Last)); return llvm::makeArrayRef(findToken(First), std::next(findToken(Last))); } @@ -564,15 +566,15 @@ /// /// Ensures that added nodes properly nest and cover the whole token stream. struct Forest { - Forest(syntax::Arena &A) { - assert(!A.getTokenBuffer().expandedTokens().empty()); - assert(A.getTokenBuffer().expandedTokens().back().kind() == tok::eof); + Forest(syntax::Arena &A, const syntax::SyntaxTokenManager &STM) { + assert(!STM.getTokenBuffer().expandedTokens().empty()); + assert(STM.getTokenBuffer().expandedTokens().back().kind() == tok::eof); // Create all leaf nodes. // Note that we do not have 'eof' in the tree. - for (const auto &T : A.getTokenBuffer().expandedTokens().drop_back()) { + for (const auto &T : STM.getTokenBuffer().expandedTokens().drop_back()) { auto *L = new (A.getAllocator()) syntax::Leaf(&T); L->Original = true; - L->CanModify = A.getTokenBuffer().spelledForExpanded(T).has_value(); + L->CanModify = STM.getTokenBuffer().spelledForExpanded(T).has_value(); Trees.insert(Trees.end(), {&T, L}); } } @@ -620,7 +622,7 @@ } /// Add \p Node to the forest and attach child nodes based on \p Tokens. - void foldChildren(const syntax::Arena &A, ArrayRef Tokens, + void foldChildren(const syntax::SyntaxTokenManager &STM, ArrayRef Tokens, syntax::Tree *Node) { // Attach children to `Node`. assert(Node->getFirstChild() == nullptr && "node already has children"); @@ -646,7 +648,7 @@ // Mark that this node came from the AST and is backed by the source code. Node->Original = true; Node->CanModify = - A.getTokenBuffer().spelledForExpanded(Tokens).has_value(); + STM.getTokenBuffer().spelledForExpanded(Tokens).has_value(); Trees.erase(BeginChildren, EndChildren); Trees.insert({FirstToken, Node}); @@ -660,18 +662,18 @@ return Root; } - std::string str(const syntax::Arena &A) const { + std::string str(const syntax::SyntaxTokenManager &STM) const { std::string R; for (auto It = Trees.begin(); It != Trees.end(); ++It) { unsigned CoveredTokens = It != Trees.end() ? (std::next(It)->first - It->first) - : A.getTokenBuffer().expandedTokens().end() - It->first; + : STM.getTokenBuffer().expandedTokens().end() - It->first; R += std::string( formatv("- '{0}' covers '{1}'+{2} tokens\n", It->second->getKind(), - It->first->text(A.getSourceManager()), CoveredTokens)); - R += It->second->dump(A.getSourceManager()); + It->first->text(STM.getSourceManager()), CoveredTokens)); + R += It->second->dump(STM); } return R; } @@ -684,9 +686,10 @@ }; /// For debugging purposes. - std::string str() { return Pending.str(Arena); } + std::string str() { return Pending.str(STM); } syntax::Arena &Arena; + SyntaxTokenManager& STM; /// To quickly find tokens by their start location. llvm::DenseMap LocationToToken; Forest Pending; @@ -1718,7 +1721,7 @@ markExprChild(ChildExpr, NodeRole::Expression); ChildNode = new (allocator()) syntax::ExpressionStatement; // (!) 'getStmtRange()' ensures this covers a trailing semicolon. - Pending.foldChildren(Arena, getStmtRange(Child), ChildNode); + Pending.foldChildren(STM, getStmtRange(Child), ChildNode); } else { ChildNode = Mapping.find(Child); } diff --git a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp --- a/clang/lib/Tooling/Syntax/ComputeReplacements.cpp +++ b/clang/lib/Tooling/Syntax/ComputeReplacements.cpp @@ -8,6 +8,7 @@ #include "clang/Tooling/Core/Replacement.h" #include "clang/Tooling/Syntax/Mutations.h" #include "clang/Tooling/Syntax/Tokens.h" +#include "clang/Tooling/Syntax/Tree.h" #include "llvm/Support/Error.h" using namespace clang; @@ -16,10 +17,12 @@ using ProcessTokensFn = llvm::function_ref, bool /*IsOriginal*/)>; /// Enumerates spans of tokens from the tree consecutively laid out in memory. -void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) { +void enumerateTokenSpans(const syntax::Tree *Root, + const syntax::SyntaxTokenManager &STM, + ProcessTokensFn Callback) { struct Enumerator { - Enumerator(ProcessTokensFn Callback) - : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), + Enumerator(const syntax::SyntaxTokenManager &STM, ProcessTokensFn Callback) + : STM(STM), SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false), Callback(Callback) {} void run(const syntax::Tree *Root) { @@ -39,7 +42,8 @@ } auto *L = cast(N); - if (SpanEnd == L->getToken() && SpanIsOriginal == L->isOriginal()) { + if (SpanEnd == STM.getToken(L->getTokenKey()) && + SpanIsOriginal == L->isOriginal()) { // Extend the current span. ++SpanEnd; return; @@ -48,24 +52,25 @@ if (SpanBegin) Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal); // Start recording a new span. - SpanBegin = L->getToken(); + SpanBegin = STM.getToken(L->getTokenKey()); SpanEnd = SpanBegin + 1; SpanIsOriginal = L->isOriginal(); } + const syntax::SyntaxTokenManager &STM; const syntax::Token *SpanBegin; const syntax::Token *SpanEnd; bool SpanIsOriginal; ProcessTokensFn Callback; }; - return Enumerator(Callback).run(Root); + return Enumerator(STM, Callback).run(Root); } -syntax::FileRange rangeOfExpanded(const syntax::Arena &A, +syntax::FileRange rangeOfExpanded(const syntax::SyntaxTokenManager &STM, llvm::ArrayRef Expanded) { - const auto &Buffer = A.getTokenBuffer(); - const auto &SM = A.getSourceManager(); + const auto &Buffer = STM.getTokenBuffer(); + const auto &SM = STM.getSourceManager(); // Check that \p Expanded actually points into expanded tokens. assert(Buffer.expandedTokens().begin() <= Expanded.begin()); @@ -85,8 +90,9 @@ tooling::Replacements syntax::computeReplacements(const syntax::Arena &A, const syntax::TranslationUnit &TU) { - const auto &Buffer = A.getTokenBuffer(); - const auto &SM = A.getSourceManager(); + const auto &STM = llvm::cast(A.getTokenManager()); + const auto &Buffer = STM.getTokenBuffer(); + const auto &SM = STM.getSourceManager(); tooling::Replacements Replacements; // Text inserted by the replacement we are building now. @@ -95,13 +101,12 @@ if (ReplacedRange.empty() && Replacement.empty()) return; llvm::cantFail(Replacements.add(tooling::Replacement( - SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement))); + SM, rangeOfExpanded(STM, ReplacedRange).toCharRange(SM), Replacement))); Replacement = ""; }; - const syntax::Token *NextOriginal = Buffer.expandedTokens().begin(); enumerateTokenSpans( - &TU, [&](llvm::ArrayRef Tokens, bool IsOriginal) { + &TU, STM, [&](llvm::ArrayRef Tokens, bool IsOriginal) { if (!IsOriginal) { Replacement += syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM); diff --git a/clang/lib/Tooling/Syntax/Synthesis.cpp b/clang/lib/Tooling/Syntax/Synthesis.cpp --- a/clang/lib/Tooling/Syntax/Synthesis.cpp +++ b/clang/lib/Tooling/Syntax/Synthesis.cpp @@ -8,6 +8,7 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Tooling/Syntax/BuildTree.h" #include "clang/Tooling/Syntax/Tree.h" +#include "clang/Tooling/Syntax/Tokens.h" using namespace clang; @@ -28,14 +29,15 @@ static std::pair> lexBuffer(syntax::Arena &A, std::unique_ptr Buffer) { - return A.lexBuffer(std::move(Buffer)); + auto& STM = llvm::cast(A.getTokenManager()); + return STM.lexBuffer(std::move(Buffer)); } }; // FIXME: `createLeaf` is based on `syntax::tokenize` internally, as such it // doesn't support digraphs or line continuations. syntax::Leaf *clang::syntax::createLeaf(syntax::Arena &A, tok::TokenKind K, - StringRef Spelling) { + StringRef Spelling) { auto Tokens = FactoryImpl::lexBuffer(A, llvm::MemoryBuffer::getMemBufferCopy(Spelling)) .second; @@ -209,11 +211,12 @@ syntax::Node *clang::syntax::deepCopyExpandingMacros(syntax::Arena &A, const syntax::Node *N) { + const auto& STM = llvm::cast(A.getTokenManager()); if (const auto *L = dyn_cast(N)) // `L->getToken()` gives us the expanded token, thus we implicitly expand // any macros here. - return createLeaf(A, L->getToken()->kind(), - L->getToken()->text(A.getSourceManager())); + return createLeaf(A, STM.getToken(L->getTokenKey())->kind(), + STM.getText(L->getTokenKey())); const auto *T = cast(N); std::vector> Children; diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -929,3 +929,13 @@ } return Dump; } + +constexpr llvm::StringLiteral syntax::SyntaxTokenManager::Kind; + +std::pair> +syntax::SyntaxTokenManager::lexBuffer(std::unique_ptr Input) { + auto FID = SM.createFileID(std::move(Input)); + auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SM,LangOpts)); + assert(It.second && "duplicate FileID"); + return {FID, It.first->second}; +} diff --git a/clang/lib/Tooling/Syntax/Tree.cpp b/clang/lib/Tooling/Syntax/Tree.cpp --- a/clang/lib/Tooling/Syntax/Tree.cpp +++ b/clang/lib/Tooling/Syntax/Tree.cpp @@ -33,25 +33,7 @@ } } // namespace -syntax::Arena::Arena(SourceManager &SourceMgr, const LangOptions &LangOpts, - const TokenBuffer &Tokens) - : SourceMgr(SourceMgr), LangOpts(LangOpts), Tokens(Tokens) {} - -const syntax::TokenBuffer &syntax::Arena::getTokenBuffer() const { - return Tokens; -} - -std::pair> -syntax::Arena::lexBuffer(std::unique_ptr Input) { - auto FID = SourceMgr.createFileID(std::move(Input)); - auto It = ExtraTokens.try_emplace(FID, tokenize(FID, SourceMgr, LangOpts)); - assert(It.second && "duplicate FileID"); - return {FID, It.first->second}; -} - -syntax::Leaf::Leaf(const syntax::Token *Tok) : Node(NodeKind::Leaf), Tok(Tok) { - assert(Tok != nullptr); -} +syntax::Leaf::Leaf(syntax::TokenManager::Key K) : Node(NodeKind::Leaf), K(K) {} syntax::Node::Node(NodeKind Kind) : Parent(nullptr), NextSibling(nullptr), PreviousSibling(nullptr), @@ -190,20 +172,8 @@ } namespace { -static void dumpLeaf(raw_ostream &OS, const syntax::Leaf *L, - const SourceManager &SM) { - assert(L); - const auto *Token = L->getToken(); - assert(Token); - // Handle 'eof' separately, calling text() on it produces an empty string. - if (Token->kind() == tok::eof) - OS << ""; - else - OS << Token->text(SM); -} - static void dumpNode(raw_ostream &OS, const syntax::Node *N, - const SourceManager &SM, llvm::BitVector IndentMask) { + const syntax::TokenManager &TM, llvm::BitVector IndentMask) { auto DumpExtraInfo = [&OS](const syntax::Node *N) { if (N->getRole() != syntax::NodeRole::Unknown) OS << " " << N->getRole(); @@ -216,7 +186,7 @@ assert(N); if (const auto *L = dyn_cast(N)) { OS << "'"; - dumpLeaf(OS, L, SM); + OS << TM.getText(L->getTokenKey()); OS << "'"; DumpExtraInfo(N); OS << "\n"; @@ -242,25 +212,25 @@ OS << "|-"; IndentMask.push_back(true); } - dumpNode(OS, &It, SM, IndentMask); + dumpNode(OS, &It, TM, IndentMask); IndentMask.pop_back(); } } } // namespace -std::string syntax::Node::dump(const SourceManager &SM) const { +std::string syntax::Node::dump(const TokenManager &TM) const { std::string Str; llvm::raw_string_ostream OS(Str); - dumpNode(OS, this, SM, /*IndentMask=*/{}); + dumpNode(OS, this, TM, /*IndentMask=*/{}); return std::move(OS.str()); } -std::string syntax::Node::dumpTokens(const SourceManager &SM) const { +std::string syntax::Node::dumpTokens(const TokenManager &TM) const { std::string Storage; llvm::raw_string_ostream OS(Storage); traverse(this, [&](const syntax::Node *N) { if (const auto *L = dyn_cast(N)) { - dumpLeaf(OS, L, SM); + OS << TM.getText(L->getTokenKey()); OS << " "; } }); @@ -297,7 +267,8 @@ C.getRole() == NodeRole::ListDelimiter); if (C.getRole() == NodeRole::ListDelimiter) { assert(isa(C)); - assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); + // FIXME: can be fixed by adding an tok::Kind in the Leaf node. + // assert(cast(C).getToken()->kind() == L->getDelimiterTokenKind()); } } diff --git a/clang/tools/clang-check/ClangCheck.cpp b/clang/tools/clang-check/ClangCheck.cpp --- a/clang/tools/clang-check/ClangCheck.cpp +++ b/clang/tools/clang-check/ClangCheck.cpp @@ -157,9 +157,10 @@ clang::syntax::TokenBuffer TB = std::move(Collector).consume(); if (TokensDump) llvm::outs() << TB.dumpForTests(); - clang::syntax::Arena A(AST.getSourceManager(), AST.getLangOpts(), TB); - llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump( - AST.getSourceManager()); + clang::syntax::SyntaxTokenManager TM(AST.getSourceManager(), + AST.getLangOpts(), TB); + clang::syntax::Arena A(TM); + llvm::outs() << clang::syntax::buildSyntaxTree(A, AST)->dump(TM); } private: diff --git a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp --- a/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/BuildTreeTest.cpp @@ -26,7 +26,7 @@ auto ErrorOK = errorOK(Code); if (!ErrorOK) return ErrorOK; - auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); + auto Actual = StringRef(Root->dump(Arena->getTokenManager())).trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Tree.trim().str(), Actual); if (Actual != Tree.trim().str()) { @@ -59,7 +59,7 @@ auto *AnnotatedNode = nodeByRange(AnnotatedRanges[i], Root); assert(AnnotatedNode); auto AnnotatedNodeDump = - StringRef(AnnotatedNode->dump(Arena->getSourceManager())) + StringRef(AnnotatedNode->dump(Arena->getTokenManager())) .trim() .str(); // EXPECT_EQ shows the diff between the two strings if they are different. diff --git a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp --- a/clang/unittests/Tooling/Syntax/SynthesisTest.cpp +++ b/clang/unittests/Tooling/Syntax/SynthesisTest.cpp @@ -27,7 +27,7 @@ return ::testing::AssertionFailure() << "Root was not built successfully."; - auto Actual = StringRef(Root->dump(Arena->getSourceManager())).trim().str(); + auto Actual = StringRef(Root->dump(Arena->getTokenManager())).trim().str(); auto Expected = Dump.trim().str(); // EXPECT_EQ shows the diff between the two strings if they are different. EXPECT_EQ(Expected, Actual); @@ -175,7 +175,7 @@ auto *Copy = deepCopyExpandingMacros(*Arena, StatementContinue); EXPECT_TRUE( - treeDumpEqual(Copy, StatementContinue->dump(Arena->getSourceManager()))); + treeDumpEqual(Copy, StatementContinue->dump(Arena->getTokenManager()))); // FIXME: Test that copy is independent of original, once the Mutations API is // more developed. } diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -112,7 +112,7 @@ createLeaf(*Arena, tok::r_paren)}; for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) { ASSERT_TRUE(Tree->findFirstLeaf() != nullptr); - EXPECT_EQ(Tree->findFirstLeaf()->getToken()->kind(), tok::l_paren); + EXPECT_EQ(TM->getToken(Tree->findFirstLeaf()->getTokenKey())->kind(), tok::l_paren); } } @@ -122,7 +122,7 @@ createLeaf(*Arena, tok::r_paren)}; for (const auto *Tree : generateAllTreesWithShape(Leafs, {3u})) { ASSERT_TRUE(Tree->findLastLeaf() != nullptr); - EXPECT_EQ(Tree->findLastLeaf()->getToken()->kind(), tok::r_paren); + EXPECT_EQ(TM->getToken(Tree->findLastLeaf()->getTokenKey())->kind(), tok::r_paren); } } @@ -180,7 +180,7 @@ private: std::string dumpQuotedTokensOrNull(const Node *N) { return N ? "'" + - StringRef(N->dumpTokens(Arena->getSourceManager())) + StringRef(N->dumpTokens(Arena->getTokenManager())) .trim() .str() + "'" diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.h b/clang/unittests/Tooling/Syntax/TreeTestBase.h --- a/clang/unittests/Tooling/Syntax/TreeTestBase.h +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.h @@ -50,6 +50,7 @@ new SourceManager(*Diags, *FileMgr); std::shared_ptr Invocation; // Set after calling buildTree(). + std::unique_ptr TM; std::unique_ptr TB; std::unique_ptr Arena; }; diff --git a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp --- a/clang/unittests/Tooling/Syntax/TreeTestBase.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTestBase.cpp @@ -35,13 +35,13 @@ using namespace clang::syntax; namespace { -ArrayRef tokens(syntax::Node *N) { +ArrayRef tokens(syntax::Node *N, const SyntaxTokenManager &STM) { assert(N->isOriginal() && "tokens of modified nodes are not well-defined"); if (auto *L = dyn_cast(N)) - return llvm::makeArrayRef(L->getToken(), 1); + return llvm::makeArrayRef(STM.getToken(L->getTokenKey()), 1); auto *T = cast(N); - return llvm::makeArrayRef(T->findFirstLeaf()->getToken(), - T->findLastLeaf()->getToken() + 1); + return llvm::makeArrayRef(STM.getToken(T->findFirstLeaf()->getTokenKey()), + STM.getToken(T->findLastLeaf()->getTokenKey()) + 1); } } // namespace @@ -69,23 +69,26 @@ class BuildSyntaxTree : public ASTConsumer { public: BuildSyntaxTree(syntax::TranslationUnit *&Root, + std::unique_ptr &TM, std::unique_ptr &TB, std::unique_ptr &Arena, std::unique_ptr Tokens) - : Root(Root), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { + : Root(Root), TM(TM), TB(TB), Arena(Arena), Tokens(std::move(Tokens)) { assert(this->Tokens); } void HandleTranslationUnit(ASTContext &Ctx) override { TB = std::make_unique(std::move(*Tokens).consume()); Tokens = nullptr; // make sure we fail if this gets called twice. - Arena = std::make_unique(Ctx.getSourceManager(), - Ctx.getLangOpts(), *TB); + TM = std::make_unique(Ctx.getSourceManager(), + Ctx.getLangOpts(), *TB); + Arena = std::make_unique(*TM); Root = syntax::buildSyntaxTree(*Arena, Ctx); } private: syntax::TranslationUnit *&Root; + std::unique_ptr &TM; std::unique_ptr &TB; std::unique_ptr &Arena; std::unique_ptr Tokens; @@ -94,21 +97,23 @@ class BuildSyntaxTreeAction : public ASTFrontendAction { public: BuildSyntaxTreeAction(syntax::TranslationUnit *&Root, + std::unique_ptr &TM, std::unique_ptr &TB, std::unique_ptr &Arena) - : Root(Root), TB(TB), Arena(Arena) {} + : Root(Root), TM(TM), TB(TB), Arena(Arena) {} std::unique_ptr CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override { // We start recording the tokens, ast consumer will take on the result. auto Tokens = std::make_unique(CI.getPreprocessor()); - return std::make_unique(Root, TB, Arena, + return std::make_unique(Root, TM, TB, Arena, std::move(Tokens)); } private: syntax::TranslationUnit *&Root; + std::unique_ptr &TM; std::unique_ptr &TB; std::unique_ptr &Arena; }; @@ -149,7 +154,7 @@ Compiler.setSourceManager(SourceMgr.get()); syntax::TranslationUnit *Root = nullptr; - BuildSyntaxTreeAction Recorder(Root, this->TB, this->Arena); + BuildSyntaxTreeAction Recorder(Root, this->TM, this->TB, this->Arena); // Action could not be executed but the frontend didn't identify any errors // in the code ==> problem in setting up the action. @@ -163,7 +168,7 @@ syntax::Node *SyntaxTreeTest::nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) { - ArrayRef Toks = tokens(Root); + ArrayRef Toks = tokens(Root, *TM); if (Toks.front().location().isFileID() && Toks.back().location().isFileID() && syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) ==