diff --git a/clang-tools-extra/clangd/ParsedAST.cpp b/clang-tools-extra/clangd/ParsedAST.cpp --- a/clang-tools-extra/clangd/ParsedAST.cpp +++ b/clang-tools-extra/clangd/ParsedAST.cpp @@ -423,6 +423,8 @@ // tokens from running the preprocessor inside the checks (only // modernize-use-trailing-return-type does that today). syntax::TokenBuffer Tokens = std::move(CollectTokens).consume(); + // Makes SelectionTree build much faster. + Tokens.indexExpandedTokens(); std::vector ParsedDecls = Action->takeTopLevelDecls(); // AST traversals should exclude the preamble, to avoid performance cliffs. Clang->getASTContext().setTraversalScope(ParsedDecls); diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -34,6 +34,7 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Token.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Compiler.h" @@ -192,8 +193,13 @@ return ExpandedTokens; } + /// Builds a cache to make future calls to expandedToken(SourceRange) faster. + /// Creates an index only once. Further calls to it will be no-op. + void indexExpandedTokens(); + /// Returns the subrange of expandedTokens() corresponding to the closed /// token range R. + /// Consider calling indexExpandedTokens() before for faster lookups. llvm::ArrayRef expandedTokens(SourceRange R) const; /// Returns the subrange of spelled tokens corresponding to AST node spanning @@ -366,6 +372,8 @@ /// same stream as 'clang -E' (excluding the preprocessor directives like /// #file, etc.). std::vector ExpandedTokens; + // Index of ExpandedTokens for faster lookups by SourceLocation. + llvm::DenseMap ExpandedTokIndex; llvm::DenseMap Files; // The value is never null, pointer instead of reference to avoid disabling // implicit assignment operator. diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -183,7 +183,31 @@ return Text.substr(Begin, length()); } +void TokenBuffer::indexExpandedTokens() { + // No-op if the index is already created. + if (!ExpandedTokIndex.empty()) + return; + ExpandedTokIndex.reserve(ExpandedTokens.size()); + // Index ExpandedTokens for faster lookups by SourceLocation. + for (size_t I = 0, E = ExpandedTokens.size(); I != E; ++I) + ExpandedTokIndex[ExpandedTokens[I].location()] = I; +} + llvm::ArrayRef TokenBuffer::expandedTokens(SourceRange R) const { + if (!ExpandedTokIndex.empty()) { + // Quick lookup if `R` is a token range. + // This is a huge win since majority of the users use ranges provided by an + // AST. Ranges in AST are token ranges from expanded token stream. + const auto B = ExpandedTokIndex.find(R.getBegin()); + const auto E = ExpandedTokIndex.find(R.getEnd()); + if (B != ExpandedTokIndex.end() && E != ExpandedTokIndex.end()) { + // Add 1 to End to make a half-open range. + return {ExpandedTokens.data() + B->getSecond(), + ExpandedTokens.data() + E->getSecond() + 1}; + } + } + // Slow case. Use `isBeforeInTranslationUnit` to binary search for the + // required range. return getTokensCovering(expandedTokens(), R, *SourceMgr); } diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -106,6 +106,7 @@ void EndSourceFileAction() override { assert(Collector && "BeginSourceFileAction was never called"); Result = std::move(*Collector).consume(); + Result.indexExpandedTokens(); } std::unique_ptr