diff --git a/clang-tools-extra/clangd/FindSymbols.h b/clang-tools-extra/clangd/FindSymbols.h --- a/clang-tools-extra/clangd/FindSymbols.h +++ b/clang-tools-extra/clangd/FindSymbols.h @@ -21,6 +21,10 @@ class ParsedAST; class SymbolIndex; +/// Helper function for deriving an LSP Location from a SymbolLocation. +llvm::Expected symbolLocationToLocation(const SymbolLocation &Loc, + llvm::StringRef HintPath); + /// Helper function for deriving an LSP Location for a Symbol. llvm::Expected symbolToLocation(const Symbol &Sym, llvm::StringRef HintPath); diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -18,6 +18,7 @@ #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Index/IndexingAction.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/ScopedPrinter.h" @@ -39,15 +40,13 @@ } // namespace -llvm::Expected symbolToLocation(const Symbol &Sym, - llvm::StringRef HintPath) { - // Prefer the definition over e.g. a function declaration in a header - auto &CD = Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration; - auto Path = URI::resolve(CD.FileURI, HintPath); +llvm::Expected symbolLocationToLocation(const SymbolLocation &Loc, + llvm::StringRef HintPath) { + auto Path = URI::resolve(Loc.FileURI, HintPath); if (!Path) { return llvm::make_error( - formatv("Could not resolve path for symbol '{0}': {1}", - Sym.Name, llvm::toString(Path.takeError())), + llvm::formatv("Could not resolve path for file '{0}': {1}", Loc.FileURI, + llvm::toString(Path.takeError())), llvm::inconvertibleErrorCode()); } Location L; @@ -55,14 +54,21 @@ // request. L.uri = URIForFile::canonicalize(*Path, HintPath); Position Start, End; - Start.line = CD.Start.line(); - Start.character = CD.Start.column(); - End.line = CD.End.line(); - End.character = CD.End.column(); + Start.line = Loc.Start.line(); + Start.character = Loc.Start.column(); + End.line = Loc.End.line(); + End.character = Loc.End.column(); L.range = {Start, End}; return L; } +llvm::Expected symbolToLocation(const Symbol &Sym, + llvm::StringRef HintPath) { + // Prefer the definition over e.g. a function declaration in a header + return symbolLocationToLocation( + Sym.Definition ? Sym.Definition : Sym.CanonicalDeclaration, HintPath); +} + llvm::Expected> getWorkspaceSymbols(llvm::StringRef Query, int Limit, const SymbolIndex *const Index, llvm::StringRef HintPath) { diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h --- a/clang-tools-extra/clangd/XRefs.h +++ b/clang-tools-extra/clangd/XRefs.h @@ -60,6 +60,21 @@ const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc, const syntax::TokenBuffer &TB); +// Tries to provide a textual fallback for locating a symbol referenced at +// a location, by looking up the word under the cursor as a symbol name in the +// index. The aim is to pick up references to symbols in contexts where +// AST-based resolution does not work, such as comments, strings, and PP +// disabled regions. The implementation takes a number of measures to avoid +// false positives, such as looking for some signal that the word at the +// given location is likely to be an identifier. The function does not +// currently return results for locations that end up as real expanded +// tokens, although this may be relaxed for e.g. dependent code in the future. +// (This is for internal use by locateSymbolAt, and is exposed for testing). +std::vector +locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index, + SourceLocation Loc, + const std::string &MainFilePath); + /// Get all document links std::vector getDocumentLinks(ParsedAST &AST); diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -10,9 +10,11 @@ #include "CodeCompletionStrings.h" #include "FindSymbols.h" #include "FindTarget.h" +#include "FuzzyMatch.h" #include "Logger.h" #include "ParsedAST.h" #include "Protocol.h" +#include "Quality.h" #include "Selection.h" #include "SourceCode.h" #include "URI.h" @@ -49,6 +51,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang { namespace clangd { @@ -369,7 +372,7 @@ // Updates BestTok and BestCost if Tok is a good candidate. // May return true if the cost is too high for this token. auto Consider = [&](const syntax::Token &Tok) { - if(!(Tok.kind() == tok::identifier && Tok.text(SM) == Word)) + if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word)) return false; // No point guessing the same location we started with. if (Tok.location() == WordStart) @@ -412,6 +415,145 @@ return BestTok; } +static bool isLikelyToBeIdentifier(StringRef Word) { + // Word contains underscore. + // This handles things like snake_case and MACRO_CASE. + if (Word.contains('_')) { + return true; + } + // Word contains capital letter other than at beginning. + // This handles things like lowerCamel and UpperCamel. + // The check for also containing a lowercase letter is to rule out + // initialisms like "HTTP". + bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos; + bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos; + if (HasLower && HasUpper) { + return true; + } + // FIXME: There are other signals we could listen for. + // Some of these require inspecting the surroundings of the word as well. + // - mid-sentence Capitalization + // - markup like quotes / backticks / brackets / "\p" + // - word has a qualifier (foo::bar) + return false; +} + +using ScoredLocatedSymbol = std::pair; +struct ScoredSymbolGreater { + bool operator()(const ScoredLocatedSymbol &L, + const ScoredLocatedSymbol &R) const { + return L.first > R.first; + } +}; + +std::vector +locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index, + SourceLocation Loc, + const std::string &MainFilePath) { + const auto &SM = AST.getSourceManager(); + FileID File; + unsigned Pos; + std::tie(File, Pos) = SM.getDecomposedLoc(Loc); + llvm::StringRef Code = SM.getBufferData(File); + auto QueryString = wordTouching(Code, Pos); + if (!isLikelyToBeIdentifier(QueryString)) { + return {}; + } + + // If this is a real token that survived preprocessing, don't + // use the textual heuristic. This is to avoid false positives + // when over tokens that happen to correspond to an identifier + // name elsewhere. + // FIXME: Relax this for dependent code. + unsigned WordOffset = QueryString.data() - Code.data(); + SourceLocation WordStart = SM.getComposedLoc(File, WordOffset); + // If this is a real token that survived preprocessing, don't use heuristics. + auto WordExpandedTokens = + AST.getTokens().expandedTokens(SM.getMacroArgExpandedLocation(WordStart)); + if (!WordExpandedTokens.empty()) + return {}; + + FuzzyFindRequest Req; + Req.Query = QueryString.str(); + Req.ProximityPaths = {MainFilePath}; + Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts()); + // FIXME: For extra strictness, consider AnyScope=false. + Req.AnyScope = true; + // We limit the results to 3 further below. This limit is to avoid fetching + // too much data, while still likely having enough for 3 results to remain + // after additional filtering. + Req.Limit = 10; + TopN Top(*Req.Limit); + FuzzyMatcher Filter(Req.Query); + Index->fuzzyFind(Req, [&](const Symbol &Sym) { + auto MaybeDeclLoc = + symbolLocationToLocation(Sym.CanonicalDeclaration, MainFilePath); + if (!MaybeDeclLoc) { + log("locateSymbolNamedTextuallyAt: {0}", MaybeDeclLoc.takeError()); + return; + } + Location DeclLoc = *MaybeDeclLoc; + Location DefLoc; + if (Sym.Definition) { + auto MaybeDefLoc = symbolLocationToLocation(Sym.Definition, MainFilePath); + if (!MaybeDefLoc) { + log("locateSymbolNamedTextuallyAt: {0}", MaybeDefLoc.takeError()); + return; + } + DefLoc = *MaybeDefLoc; + } + Location PreferredLoc = bool(Sym.Definition) ? DefLoc : DeclLoc; + + // For now, only consider exact name matches, including case. + // This is to avoid too many false positives. + // We could relax this in the future if we make the query more accurate + // by other means. + if (Sym.Name != QueryString) + return; + + // Exclude constructor results. They have the same name as the class, + // but we don't have enough context to prefer them over the class. + if (Sym.SymInfo.Kind == index::SymbolKind::Constructor) + return; + + std::string Scope = std::string(Sym.Scope); + llvm::StringRef ScopeRef = Scope; + ScopeRef.consume_back("::"); + LocatedSymbol Located; + Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str(); + Located.PreferredDeclaration = DeclLoc; + Located.Definition = DefLoc; + + SymbolQualitySignals Quality; + Quality.merge(Sym); + SymbolRelevanceSignals Relevance; + Relevance.Name = Sym.Name; + Relevance.Query = SymbolRelevanceSignals::Generic; + if (auto NameMatch = Filter.match(Sym.Name)) + Relevance.NameMatch = *NameMatch; + else + return; + Relevance.merge(Sym); + auto Score = + evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate()); + dlog("locateSymbolNamedTextuallyAt: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, + Sym.Name, Score, Quality, Relevance); + + Top.push({Score, std::move(Located)}); + }); + std::vector Result; + for (auto &Res : std::move(Top).items()) + Result.push_back(std::move(Res.second)); + // Assume we don't have results from the current file, otherwise the + // findNearbyIdentifier() mechanism would have handled them. + // If we have more than 3 results, and none from the current file, don't + // return anything, as confidence is too low. + // FIXME: Alternatively, try a stricter query? + if (Result.size() > 3) + return {}; + return Result; +} + std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index) { const auto &SM = AST.getSourceManager(); @@ -457,7 +599,7 @@ return ASTResults; } - return {}; + return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath); } std::vector getDocumentLinks(ParsedAST &AST) { diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -596,6 +596,44 @@ } } +TEST(LocateSymbol, Textual) { + const char *Tests[] = { + R"cpp(// Comment + struct [[MyClass]] {}; + // Comment mentioning M^yClass + )cpp", + R"cpp(// String + struct [[MyClass]] {}; + const char* s = "String literal mentioning M^yClass"; + )cpp", + R"cpp(// Ifdef'ed out code + struct [[MyClass]] {}; + #ifdef WALDO + M^yClass var; + #endif + )cpp"}; + + for (const char *Test : Tests) { + Annotations T(Test); + llvm::Optional WantDecl; + if (!T.ranges().empty()) + WantDecl = T.range(); + + auto TU = TestTU::withCode(T.code()); + + auto AST = TU.build(); + auto Index = TU.index(); + auto Results = locateSymbolAt(AST, T.point(), Index.get()); + + if (!WantDecl) { + EXPECT_THAT(Results, IsEmpty()) << Test; + } else { + ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test; + EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test; + } + } +} + TEST(LocateSymbol, Ambiguous) { auto T = Annotations(R"cpp( struct Foo { @@ -670,6 +708,26 @@ Sym("baz", T.range("StaticOverload2")))); } +TEST(LocateSymbol, TextualAmbiguous) { + auto T = Annotations(R"cpp( + struct Foo { + void $FooLoc[[uniqueMethodName]](); + }; + struct Bar { + void $BarLoc[[uniqueMethodName]](); + }; + // Will call u^niqueMethodName() on t. + template + void f(T t); + )cpp"); + auto TU = TestTU::withCode(T.code()); + auto AST = TU.build(); + auto Index = TU.index(); + EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()), + UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")), + Sym("uniqueMethodName", T.range("BarLoc")))); +} + TEST(LocateSymbol, TemplateTypedefs) { auto T = Annotations(R"cpp( template struct function {}; @@ -869,37 +927,37 @@ TEST(LocateSymbol, NearbyIdentifier) { const char *Tests[] = { - R"cpp( + R"cpp( // regular identifiers (won't trigger) int hello; int y = he^llo; )cpp", - R"cpp( + R"cpp( // disabled preprocessor sections int [[hello]]; #if 0 int y = ^hello; #endif )cpp", - R"cpp( + R"cpp( // comments // he^llo, world int [[hello]]; )cpp", - R"cpp( + R"cpp( // string literals int [[hello]]; const char* greeting = "h^ello, world"; )cpp", - R"cpp( + R"cpp( // can refer to macro invocations (even if they expand to nothing) #define INT int [[INT]] x; // I^NT )cpp", - R"cpp( + R"cpp( // prefer nearest occurrence int hello; int x = hello; @@ -908,12 +966,12 @@ int z = hello; )cpp", - R"cpp( + R"cpp( // short identifiers find near results int [[hi]]; // h^i )cpp", - R"cpp( + R"cpp( // short identifiers don't find far results int hi; @@ -922,13 +980,13 @@ // h^i )cpp", }; - for (const char* Test : Tests) { + for (const char *Test : Tests) { Annotations T(Test); auto AST = TestTU::withCode(T.code()).build(); const auto &SM = AST.getSourceManager(); llvm::Optional Nearby; - if (const auto*Tok = findNearbyIdentifier( - cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens())) + if (const auto *Tok = findNearbyIdentifier( + cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens())) Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange( Tok->location(), Tok->endLocation())); if (T.ranges().empty())