diff --git a/clang-tools-extra/clangd/FindSymbols.h b/clang-tools-extra/clangd/FindSymbols.h --- a/clang-tools-extra/clangd/FindSymbols.h +++ b/clang-tools-extra/clangd/FindSymbols.h @@ -21,9 +21,13 @@ class ParsedAST; class SymbolIndex; +/// Helper function for deriving an LSP Location from an index SymbolLocation. +llvm::Expected indexToLSPLocation(const SymbolLocation &Loc, + llvm::StringRef TUPath); + /// Helper function for deriving an LSP Location for a Symbol. llvm::Expected symbolToLocation(const Symbol &Sym, - llvm::StringRef HintPath); + llvm::StringRef TUPath); /// Searches for the symbols matching \p Query. The syntax of \p Query can be /// the non-qualified name or fully qualified of a symbol. For example, diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -18,6 +18,7 @@ #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Index/IndexingAction.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" @@ -39,30 +40,33 @@ } // namespace -llvm::Expected symbolToLocation(const Symbol &Sym, - llvm::StringRef HintPath) { - // Prefer the definition over e.g. a function declaration in a header - auto &CD = Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration; - auto Path = URI::resolve(CD.FileURI, HintPath); +llvm::Expected indexToLSPLocation(const SymbolLocation &Loc, + llvm::StringRef TUPath) { + auto Path = URI::resolve(Loc.FileURI, TUPath); if (!Path) { return llvm::make_error( - formatv("Could not resolve path for symbol '{0}': {1}", - Sym.Name, llvm::toString(Path.takeError())), + llvm::formatv("Could not resolve path for file '{0}': {1}", Loc.FileURI, + llvm::toString(Path.takeError())), llvm::inconvertibleErrorCode()); } Location L; - // Use HintPath as TUPath since there is no TU associated with this - // request. - L.uri = URIForFile::canonicalize(*Path, HintPath); + L.uri = URIForFile::canonicalize(*Path, TUPath); Position Start, End; - Start.line = CD.Start.line(); - Start.character = CD.Start.column(); - End.line = CD.End.line(); - End.character = CD.End.column(); + Start.line = Loc.Start.line(); + Start.character = Loc.Start.column(); + End.line = Loc.End.line(); + End.character = Loc.End.column(); L.range = {Start, End}; return L; } +llvm::Expected symbolToLocation(const Symbol &Sym, + llvm::StringRef TUPath) { + // Prefer the definition over e.g. a function declaration in a header + return indexToLSPLocation( + Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration, TUPath); +} + llvm::Expected> getWorkspaceSymbols(llvm::StringRef Query, int Limit, const SymbolIndex *const Index, llvm::StringRef HintPath) { diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h --- a/clang-tools-extra/clangd/XRefs.h +++ b/clang-tools-extra/clangd/XRefs.h @@ -49,6 +49,21 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index = nullptr); +// Tries to provide a textual fallback for locating a symbol referenced at +// a location, by looking up the word under the cursor as a symbol name in the +// index. The aim is to pick up references to symbols in contexts where +// AST-based resolution does not work, such as comments, strings, and PP +// disabled regions. The implementation takes a number of measures to avoid +// false positives, such as looking for some signal that the word at the +// given location is likely to be an identifier. The function does not +// currently return results for locations that end up as real expanded +// tokens, although this may be relaxed for e.g. dependent code in the future. +// (This is for internal use by locateSymbolAt, and is exposed for testing). +std::vector +locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index, + SourceLocation Loc, + const std::string &MainFilePath); + /// Get all document links std::vector getDocumentLinks(ParsedAST &AST); diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -13,6 +13,7 @@ #include "Logger.h" #include "ParsedAST.h" #include "Protocol.h" +#include "Quality.h" #include "Selection.h" #include "SourceCode.h" #include "URI.h" @@ -28,6 +29,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/Type.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" @@ -173,12 +175,10 @@ return L; } -} // namespace - // Treat #included files as symbols, to enable go-to-definition on them. -static llvm::Optional -locateFileReferent(const Position &Pos, ParsedAST &AST, - llvm::StringRef MainFilePath) { +llvm::Optional locateFileReferent(const Position &Pos, + ParsedAST &AST, + llvm::StringRef MainFilePath) { for (auto &Inc : AST.getIncludeStructure().MainFileIncludes) { if (!Inc.Resolved.empty() && Inc.R.start.line == Pos.line) { LocatedSymbol File; @@ -195,7 +195,7 @@ // Macros are simple: there's no declaration/definition distinction. // As a consequence, there's no need to look them up in the index either. -static llvm::Optional +llvm::Optional locateMacroReferent(const syntax::Token &TouchedIdentifier, ParsedAST &AST, llvm::StringRef MainFilePath) { if (auto M = locateMacroAt(TouchedIdentifier, AST.getPreprocessor())) { @@ -215,7 +215,7 @@ // The AST contains at least a declaration, maybe a definition. // These are up-to-date, and so generally preferred over index results. // We perform a single batch index lookup to find additional definitions. -static std::vector +std::vector locateASTReferent(SourceLocation CurLoc, const syntax::Token *TouchedIdentifier, ParsedAST &AST, llvm::StringRef MainFilePath, const SymbolIndex *Index) { @@ -316,6 +316,164 @@ return Result; } +llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) { + unsigned B = Offset, E = Offset; + while (B > 0 && isIdentifierBody(Code[B - 1])) + --B; + while (E < Code.size() && isIdentifierBody(Code[E])) + ++E; + return Code.slice(B, E); +} + +bool isLikelyToBeIdentifier(StringRef Word) { + // Word contains underscore. + // This handles things like snake_case and MACRO_CASE. + if (Word.contains('_')) { + return true; + } + // Word contains capital letter other than at beginning. + // This handles things like lowerCamel and UpperCamel. + // The check for also containing a lowercase letter is to rule out + // initialisms like "HTTP". + bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos; + bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos; + if (HasLower && HasUpper) { + return true; + } + // FIXME: There are other signals we could listen for. + // Some of these require inspecting the surroundings of the word as well. + // - mid-sentence Capitalization + // - markup like quotes / backticks / brackets / "\p" + // - word has a qualifier (foo::bar) + return false; +} + +bool tokenSurvivedPreprocessing(SourceLocation Loc, + const syntax::TokenBuffer &TB) { + auto WordExpandedTokens = + TB.expandedTokens(TB.sourceManager().getMacroArgExpandedLocation(Loc)); + return !WordExpandedTokens.empty(); +} + +} // namespace + +std::vector +locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index, + SourceLocation Loc, + const std::string &MainFilePath) { + const auto &SM = AST.getSourceManager(); + + // Get the raw word at the specified location. + unsigned Pos; + FileID File; + std::tie(File, Pos) = SM.getDecomposedLoc(Loc); + llvm::StringRef Code = SM.getBufferData(File); + llvm::StringRef Word = wordTouching(Code, Pos); + if (Word.empty()) + return {}; + unsigned WordOffset = Word.data() - Code.data(); + SourceLocation WordStart = SM.getComposedLoc(File, WordOffset); + + // Do not consider tokens that survived preprocessing. + // We are erring on the safe side here, as a user may expect to get + // accurate (as opposed to textual-heuristic) results for such tokens. + // FIXME: Relax this for dependent code. + if (tokenSurvivedPreprocessing(WordStart, AST.getTokens())) { + return {}; + } + + // Additionally filter for signals that the word is likely to be an + // identifier. This avoids triggering on e.g. random words in a comment. + if (!isLikelyToBeIdentifier(Word)) { + return {}; + } + + // Look up the selected word in the index. + FuzzyFindRequest Req; + Req.Query = Word.str(); + Req.ProximityPaths = {MainFilePath}; + Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts()); + // FIXME: For extra strictness, consider AnyScope=false. + Req.AnyScope = true; + // We limit the results to 3 further below. This limit is to avoid fetching + // too much data, while still likely having enough for 3 results to remain + // after additional filtering. + Req.Limit = 10; + bool TooMany = false; + using ScoredLocatedSymbol = std::pair; + std::vector ScoredResults; + Index->fuzzyFind(Req, [&](const Symbol &Sym) { + // Only consider exact name matches, including case. + // This is to avoid too many false positives. + // We could relax this in the future (e.g. to allow for typos) if we make + // the query more accurate by other means. + if (Sym.Name != Word) + return; + + // Exclude constructor results. They have the same name as the class, + // but we don't have enough context to prefer them over the class. + if (Sym.SymInfo.Kind == index::SymbolKind::Constructor) + return; + + auto MaybeDeclLoc = + indexToLSPLocation(Sym.CanonicalDeclaration, MainFilePath); + if (!MaybeDeclLoc) { + log("locateSymbolNamedTextuallyAt: {0}", MaybeDeclLoc.takeError()); + return; + } + Location DeclLoc = *MaybeDeclLoc; + Location DefLoc; + if (Sym.Definition) { + auto MaybeDefLoc = indexToLSPLocation(Sym.Definition, MainFilePath); + if (!MaybeDefLoc) { + log("locateSymbolNamedTextuallyAt: {0}", MaybeDefLoc.takeError()); + return; + } + DefLoc = *MaybeDefLoc; + } + + if (ScoredResults.size() >= 3) { + // Assume we don't have results from the current file, otherwise the + // findNearbyIdentifier() mechanism would have handled them. + // If we have more than 3 results, and none from the current file, don't + // return anything, as confidence is too low. + // FIXME: Alternatively, try a stricter query? + TooMany = true; + return; + } + + LocatedSymbol Located; + Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str(); + Located.PreferredDeclaration = bool(Sym.Definition) ? DefLoc : DeclLoc; + Located.Definition = DefLoc; + + SymbolQualitySignals Quality; + Quality.merge(Sym); + SymbolRelevanceSignals Relevance; + Relevance.Name = Sym.Name; + Relevance.Query = SymbolRelevanceSignals::Generic; + Relevance.merge(Sym); + auto Score = + evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate()); + dlog("locateSymbolNamedTextuallyAt: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, + Sym.Name, Score, Quality, Relevance); + + ScoredResults.push_back({Score, std::move(Located)}); + }); + + if (TooMany) + return {}; + + llvm::sort(ScoredResults, + [](const ScoredLocatedSymbol &A, const ScoredLocatedSymbol &B) { + return A.first > B.first; + }); + std::vector Results; + for (auto &Res : std::move(ScoredResults)) + Results.push_back(std::move(Res.second)); + return Results; +} + std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index) { const auto &SM = AST.getSourceManager(); @@ -346,8 +504,12 @@ // expansion.) return {*std::move(Macro)}; - return locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, - Index); + auto ASTResults = + locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, Index); + if (!ASTResults.empty()) + return ASTResults; + + return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath); } std::vector getDocumentLinks(ParsedAST &AST) { diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -38,6 +38,7 @@ namespace { using ::testing::ElementsAre; +using ::testing::Eq; using ::testing::IsEmpty; using ::testing::Matcher; using ::testing::UnorderedElementsAreArray; @@ -353,7 +354,7 @@ R"cpp(// Symbol concatenated inside macro (not supported) int *pi; #define POINTER(X) p ## X; - int i = *POINTER(^i); + int x = *POINTER(^i); )cpp", R"cpp(// Forward class declaration @@ -606,6 +607,81 @@ } } +TEST(LocateSymbol, TextualSmoke) { + auto T = Annotations( + R"cpp( + struct [[MyClass]] {}; + // Comment mentioning M^yClass + )cpp"); + + auto TU = TestTU::withCode(T.code()); + auto AST = TU.build(); + auto Index = TU.index(); + EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()), + ElementsAre(Sym("MyClass", T.range()))); +} + +TEST(LocateSymbol, Textual) { + const char *Tests[] = { + R"cpp(// Comment + struct [[MyClass]] {}; + // Comment mentioning M^yClass + )cpp", + R"cpp(// String + struct [[MyClass]] {}; + const char* s = "String literal mentioning M^yClass"; + )cpp", + R"cpp(// Ifdef'ed out code + struct [[MyClass]] {}; + #ifdef WALDO + M^yClass var; + #endif + )cpp", + R"cpp(// Macro definition + struct [[MyClass]] {}; + #define DECLARE_MYCLASS_OBJ(name) M^yClass name; + )cpp", + R"cpp(// Invalid code + /*error-ok*/ + int myFunction(int); + // Not triggered for token which survived preprocessing. + int var = m^yFunction(); + )cpp", + R"cpp(// Dependent type + struct Foo { + void uniqueMethodName(); + }; + template + void f(T t) { + // Not triggered for token which survived preprocessing. + t->u^niqueMethodName(); + } + )cpp"}; + + for (const char *Test : Tests) { + Annotations T(Test); + llvm::Optional WantDecl; + if (!T.ranges().empty()) + WantDecl = T.range(); + + auto TU = TestTU::withCode(T.code()); + + auto AST = TU.build(); + auto Index = TU.index(); + auto Results = locateSymbolNamedTextuallyAt( + AST, Index.get(), + cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())), + testPath(TU.Filename)); + + if (!WantDecl) { + EXPECT_THAT(Results, IsEmpty()) << Test; + } else { + ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test; + EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test; + } + } +} + TEST(LocateSymbol, Ambiguous) { auto T = Annotations(R"cpp( struct Foo { @@ -680,6 +756,30 @@ Sym("baz", T.range("StaticOverload2")))); } +TEST(LocateSymbol, TextualAmbiguous) { + auto T = Annotations(R"cpp( + struct Foo { + void $FooLoc[[uniqueMethodName]](); + }; + struct Bar { + void $BarLoc[[uniqueMethodName]](); + }; + // Will call u^niqueMethodName() on t. + template + void f(T t); + )cpp"); + auto TU = TestTU::withCode(T.code()); + auto AST = TU.build(); + auto Index = TU.index(); + auto Results = locateSymbolNamedTextuallyAt( + AST, Index.get(), + cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())), + testPath(TU.Filename)); + EXPECT_THAT(Results, + UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")), + Sym("uniqueMethodName", T.range("BarLoc")))); +} + TEST(LocateSymbol, TemplateTypedefs) { auto T = Annotations(R"cpp( template struct function {};