diff --git a/clang-tools-extra/clangd/index/Ref.h b/clang-tools-extra/clangd/index/Ref.h --- a/clang-tools-extra/clangd/index/Ref.h +++ b/clang-tools-extra/clangd/index/Ref.h @@ -27,10 +27,43 @@ /// This is a bitfield which can be combined from different kinds. enum class RefKind : uint8_t { Unknown = 0, - Declaration = static_cast(index::SymbolRole::Declaration), - Definition = static_cast(index::SymbolRole::Definition), - Reference = static_cast(index::SymbolRole::Reference), - All = Declaration | Definition | Reference, + // Points to symbol declaration. Example: + // + // class Foo {}; + // ^ Foo declaration + // Foo foo; + // ^ this does not reference Foo declaration + Declaration = 1 << 0, + // Points to symbol definition. Example: + // + // int foo(); + // ^ references foo declaration, but not foo definition + // int foo() { return 42; } + // ^ references foo definition, but not declaration + // bool bar() { return true; } + // ^ references both definition and declaration + Definition = 1 << 1, + // Points to symbol reference. Example: + // + // int Foo = 42; + // int Bar = Foo + 1; + // ^ this is a reference to Foo + Reference = 1 << 2, + // The reference explicitly spells out declaration's name. Such references can + // not come from macro expansions or implicit AST nodes. + // + // class Foo {}; + // ^ references declaration, definition and explicitly spells out name + // #define MACRO Foo + // + // Foo foo; + // ^ this reference explicitly spells out Foo's name + // struct Bar { + // MACRO Internal; + // ^ this references Foo, but does not explicitly spell out its name + // }; + Spelled = 1 << 3, + All = Declaration | Definition | Reference | Spelled, }; inline RefKind operator|(RefKind L, RefKind R) { @@ -72,6 +105,7 @@ using value_type = std::pair>; using const_iterator = std::vector::const_iterator; using iterator = const_iterator; + using size_type = size_t; RefSlab() = default; RefSlab(RefSlab &&Slab) = default; @@ -80,11 +114,11 @@ const_iterator begin() const { return Refs.begin(); } const_iterator end() const { return Refs.end(); } /// Gets the number of symbols. - size_t size() const { return Refs.size(); } - size_t numRefs() const { return NumRefs; } + size_type size() const { return Refs.size(); } + size_type numRefs() const { return NumRefs; } bool empty() const { return Refs.empty(); } - size_t bytes() const { + size_type bytes() const { return sizeof(*this) + Arena.getTotalMemory() + sizeof(value_type) * Refs.capacity(); } @@ -106,13 +140,13 @@ private: RefSlab(std::vector Refs, llvm::BumpPtrAllocator Arena, - size_t NumRefs) + size_type NumRefs) : Arena(std::move(Arena)), Refs(std::move(Refs)), NumRefs(NumRefs) {} llvm::BumpPtrAllocator Arena; std::vector Refs; /// Number of all references. - size_t NumRefs = 0; + size_type NumRefs = 0; }; } // namespace clangd diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h --- a/clang-tools-extra/clangd/index/SymbolCollector.h +++ b/clang-tools-extra/clangd/index/SymbolCollector.h @@ -19,6 +19,7 @@ #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Sema/CodeCompleteConsumer.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Support/Regex.h" #include @@ -85,6 +86,9 @@ /// If this is set, only collect symbols/references from a file if /// `FileFilter(SM, FID)` is true. If not set, all files are indexed. std::function FileFilter = nullptr; + /// If set to true, SymbolCollector will return only references that + /// explicitly spell out symbol's name. + bool DropImplicitReferences = false; }; SymbolCollector(Options Opts); diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -28,6 +28,7 @@ #include "clang/Index/IndexingAction.h" #include "clang/Index/USRGeneration.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Tooling/Syntax/Tokens.h" #include "llvm/Support/Casting.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" @@ -180,7 +181,16 @@ } RefKind toRefKind(index::SymbolRoleSet Roles) { - return static_cast(static_cast(RefKind::All) & Roles); + RefKind Result = RefKind::Unknown; + if (Roles & static_cast(index::SymbolRole::Declaration)) + Result |= RefKind::Declaration; + if (Roles & static_cast(index::SymbolRole::Definition)) + Result |= RefKind::Definition; + if (Roles & static_cast(index::SymbolRole::Reference)) + Result |= RefKind::Reference; + if (!(Roles & static_cast(index::SymbolRole::Implicit))) + Result |= RefKind::Spelled; + return Result; } bool shouldIndexRelation(const index::SymbolRelation &R) { @@ -291,7 +301,7 @@ // occurrence inside the base-specifier. processRelations(*ND, *ID, Relations); - bool CollectRef = static_cast(Opts.RefFilter) & Roles; + bool CollectRef = static_cast(Opts.RefFilter & toRefKind(Roles)); bool IsOnlyRef = !(Roles & (static_cast(index::SymbolRole::Declaration) | static_cast(index::SymbolRole::Definition))); @@ -568,6 +578,9 @@ R.Location.End = Range.second; R.Location.FileURI = FileURI->c_str(); R.Kind = toRefKind(LocAndRole.second); + if (Opts.DropImplicitReferences && + !static_cast(R.Kind & RefKind::Spelled)) + return; Refs.insert(ID, R); } }; @@ -577,11 +590,30 @@ CollectRef(IDAndRefs.first, LocAndRole); } // Populate Refs slab from DeclRefs. - if (auto MainFileURI = GetURI(SM.getMainFileID())) { - for (const auto &It : DeclRefs) { - if (auto ID = getSymbolID(It.first)) { - for (const auto &LocAndRole : It.second) - CollectRef(*ID, LocAndRole); + llvm::DenseMap> FilesToTokensCache; + for (auto &DeclAndRef : DeclRefs) { + if (auto ID = getSymbolID(DeclAndRef.first)) { + for (auto &LocAndRole : DeclAndRef.second) { + const auto FileID = SM.getFileID(LocAndRole.first); + // FIXME: It's better to use TokenBuffer by passing spelled tokens from + // the caller of SymbolCollector. + if (!FilesToTokensCache.count(FileID)) + FilesToTokensCache[FileID] = + syntax::tokenize(FileID, SM, ASTCtx->getLangOpts()); + llvm::ArrayRef Tokens = FilesToTokensCache[FileID]; + // Check if the referenced symbol is spelled exactly the same way the + // corresponding NamedDecl is. If it isn't, mark this reference as + // implicit. An example of implicit reference (implicit = !spelled) + // would be a macro expansion. + const auto *IdentifierToken = + spelledIdentifierTouching(LocAndRole.first, Tokens); + DeclarationName Name = DeclAndRef.first->getDeclName(); + if (!IdentifierToken || + (Name.isIdentifier() && + Name.getAsString() != IdentifierToken->text(SM))) + LocAndRole.second |= + static_cast(index::SymbolRole::Implicit); + CollectRef(*ID, LocAndRole); } } } diff --git a/clang-tools-extra/clangd/refactor/Rename.cpp b/clang-tools-extra/clangd/refactor/Rename.cpp --- a/clang-tools-extra/clangd/refactor/Rename.cpp +++ b/clang-tools-extra/clangd/refactor/Rename.cpp @@ -296,6 +296,8 @@ bool HasMore = Index.refs(RQuest, [&](const Ref &R) { if (AffectedFiles.size() > MaxLimitFiles) return; + if (!static_cast(R.Kind & RefKind::Spelled)) + return; if (auto RefFilePath = filePath(R.Location, /*HintFilePath=*/MainFile)) { if (*RefFilePath != MainFile) AffectedFiles[*RefFilePath].push_back(toRange(R.Location)); diff --git a/clang-tools-extra/clangd/unittests/RenameTests.cpp b/clang-tools-extra/clangd/unittests/RenameTests.cpp --- a/clang-tools-extra/clangd/unittests/RenameTests.cpp +++ b/clang-tools-extra/clangd/unittests/RenameTests.cpp @@ -33,7 +33,7 @@ // Convert a Range to a Ref. Ref refWithRange(const clangd::Range &Range, const std::string &URI) { Ref Result; - Result.Kind = RefKind::Reference; + Result.Kind = RefKind::Reference | RefKind::Spelled; Result.Location.Start.setLine(Range.start.line); Result.Location.Start.setColumn(Range.start.character); Result.Location.End.setLine(Range.end.line); @@ -837,7 +837,7 @@ { // variables. R"cpp( - static const int [[VA^R]] = 123; + static const int [[VA^R]] = 123; )cpp", R"cpp( #include "foo.h" diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -42,6 +42,7 @@ using ::testing::IsEmpty; using ::testing::Not; using ::testing::Pair; +using ::testing::SizeIs; using ::testing::UnorderedElementsAre; using ::testing::UnorderedElementsAreArray; @@ -700,6 +701,40 @@ EXPECT_THAT(Refs, IsEmpty()); } +TEST_F(SymbolCollectorTest, SpelledReference) { + Annotations Header(R"cpp( + class Foo {}; + #define MACRO Foo + )cpp"); + Annotations Main(R"cpp( + $spelled[[Foo]] Variable1; + $macro[[MACRO]] Variable2; + )cpp"); + CollectorOpts.RefFilter = RefKind::All; + CollectorOpts.DropImplicitReferences = true; + runSymbolCollector(Header.code(), Main.code()); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "Foo").ID, + HaveRanges(Main.ranges("spelled"))))); + EXPECT_THAT(Refs, Contains(Pair(_, HaveRanges(Main.ranges("macro"))))); + EXPECT_THAT(Refs, SizeIs(2)); +} + +TEST_F(SymbolCollectorTest, ImplicitReference) { + Annotations Header(R"cpp( + class Foo {}; + #define MACRO Foo + )cpp"); + Annotations Main(R"cpp( + $implicit[[MACRO]] Variable2; + )cpp"); + CollectorOpts.RefFilter = RefKind::All; + runSymbolCollector(Header.code(), Main.code()); + EXPECT_THAT(Refs, Contains(Pair(findSymbol(Symbols, "Foo").ID, + HaveRanges(Main.ranges("implicit"))))); + EXPECT_THAT(Refs, Contains(Pair(_, HaveRanges(Main.ranges("implicit"))))); + EXPECT_THAT(Refs, SizeIs(2)); +} + TEST_F(SymbolCollectorTest, NameReferences) { CollectorOpts.RefFilter = RefKind::All; CollectorOpts.RefsInHeaders = true; diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -313,11 +313,22 @@ const SourceManager *SourceMgr; }; +/// The spelled tokens that overlap or touch a spelling location Loc. +/// This always returns 0-2 tokens. +llvm::ArrayRef +spelledTokensTouching(SourceLocation Loc, llvm::ArrayRef Tokens); + /// The spelled tokens that overlap or touch a spelling location Loc. /// This always returns 0-2 tokens. llvm::ArrayRef spelledTokensTouching(SourceLocation Loc, const syntax::TokenBuffer &Tokens); +/// The identifier token that overlaps or touches a spelling location Loc. +/// If there is none, returns nullptr. +const syntax::Token * +spelledIdentifierTouching(SourceLocation Loc, + llvm::ArrayRef Tokens); + /// The identifier token that overlaps or touches a spelling location Loc. /// If there is none, returns nullptr. const syntax::Token * diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -256,21 +256,27 @@ llvm::ArrayRef syntax::spelledTokensTouching(SourceLocation Loc, - const syntax::TokenBuffer &Tokens) { + llvm::ArrayRef Tokens) { assert(Loc.isFileID()); - llvm::ArrayRef All = - Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc)); auto *Right = llvm::partition_point( - All, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); - bool AcceptRight = Right != All.end() && Right->location() <= Loc; - bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc; + Tokens, [&](const syntax::Token &Tok) { return Tok.location() < Loc; }); + bool AcceptRight = Right != Tokens.end() && Right->location() <= Loc; + bool AcceptLeft = + Right != Tokens.begin() && (Right - 1)->endLocation() >= Loc; return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0), Right + (AcceptRight ? 1 : 0)); } +llvm::ArrayRef +syntax::spelledTokensTouching(SourceLocation Loc, + const syntax::TokenBuffer &Tokens) { + return spelledTokensTouching( + Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); +} + const syntax::Token * syntax::spelledIdentifierTouching(SourceLocation Loc, - const syntax::TokenBuffer &Tokens) { + llvm::ArrayRef Tokens) { for (const syntax::Token &Tok : spelledTokensTouching(Loc, Tokens)) { if (Tok.kind() == tok::identifier) return &Tok; @@ -278,6 +284,13 @@ return nullptr; } +const syntax::Token * +syntax::spelledIdentifierTouching(SourceLocation Loc, + const syntax::TokenBuffer &Tokens) { + return spelledIdentifierTouching( + Loc, Tokens.spelledTokens(Tokens.sourceManager().getFileID(Loc))); +} + std::vector TokenBuffer::macroExpansions(FileID FID) const { auto FileIt = Files.find(FID);