diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -68,7 +68,7 @@ /// FIXME: This should return an error if the location is invalid. Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc); -/// Returns the taken range at \p TokLoc. +/// Returns the token range at \p TokLoc. llvm::Optional getTokenRange(const SourceManager &SM, const LangOptions &LangOpts, SourceLocation TokLoc); @@ -86,6 +86,13 @@ const SourceManager &SM, const LangOptions &LangOpts); +/// Get the source range of the raw word at a specified \p Pos in the main file. +/// This is similar to the token at the specified position, but for positions +/// inside comments and strings, it only returns a single word rather than +/// the entire comment or string token. +SourceRange getWordAtPosition(const Position &Pos, const SourceManager &SM, + const LangOptions &LangOpts); + /// Returns true iff \p Loc is inside the main file. This function handles /// file & macro locations. For macro locations, returns iff the macro is being /// expanded inside the main file. diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -272,6 +272,42 @@ return Other; } +SourceLocation getRawWordBegin(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + llvm::StringRef Buf = SM.getBufferData(SM.getMainFileID()); + FileID FID; + unsigned Offset; + std::tie(FID, Offset) = SM.getDecomposedLoc(Loc); + unsigned Start = Offset; + while (Start > 0 && Lexer::isIdentifierBodyChar(Buf[Start - 1], LangOpts)) { + --Start; + } + return SM.getComposedLoc(FID, Start); +} + +SourceLocation getRawWordEnd(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + llvm::StringRef Buf = SM.getBufferData(SM.getMainFileID()); + FileID FID; + unsigned Offset; + std::tie(FID, Offset) = SM.getDecomposedLoc(Loc); + unsigned End = Offset; + while (End < Buf.size() && Lexer::isIdentifierBodyChar(Buf[End], LangOpts)) + ++End; + return SM.getComposedLoc(FID, End); +} + +SourceLocation getEndOfIdentifier(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + if (!Loc.isValid()) + return SourceLocation{}; + bool Raw = getTokenFlavor(Loc, SM, LangOpts) == Other; + if (Raw) { + return getRawWordEnd(Loc, SM, LangOpts); + } + return Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); +} + } // namespace SourceLocation getBeginningOfIdentifier(const Position &Pos, @@ -311,8 +347,9 @@ // For interesting token, we return the beginning of the token. if (CurrentKind == Identifier || CurrentKind == Operator) return CurrentTokBeginning; - // otherwise, we return the original loc. - return InputLoc; + // Otherwise, return the beginning of the raw word at the given position. + // This facilitates selecting identifiers in comments and strings. + return getRawWordBegin(InputLoc, SM, LangOpts); } // Whitespace is not interesting. @@ -337,6 +374,13 @@ return InputLoc; } +SourceRange getWordAtPosition(const Position &Pos, const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation Begin = getBeginningOfIdentifier(Pos, SM, LangOpts); + SourceLocation End = getEndOfIdentifier(Begin, SM, LangOpts); + return {Begin, End}; +} + bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { if (!R.getBegin().isValid() || !R.getEnd().isValid()) return false; diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -10,9 +10,11 @@ #include "CodeCompletionStrings.h" #include "FindSymbols.h" #include "FindTarget.h" +#include "FuzzyMatch.h" #include "Logger.h" #include "ParsedAST.h" #include "Protocol.h" +#include "Quality.h" #include "Selection.h" #include "SourceCode.h" #include "URI.h" @@ -45,6 +47,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" +#include namespace clang { namespace clangd { @@ -191,6 +194,116 @@ return Result; } +bool isLikelyToBeIdentifier(StringRef Word) { + // Word contains underscore + if (Word.contains('_')) { + return true; + } + // Word contains capital letter other than at beginning + if (Word.substr(1).find_if([](char C) { return std::isupper(C); }) != + StringRef::npos) { + return true; + } + // FIXME: There are other signals we could listen for. + // Some of these require inspecting the surroundings of the word as well. + // - mid-sentence Capitalization + // - markup like quotes / backticks / brackets / "\p" + // - word is used as an identifier in nearby token (very close if very + // short, anywhere in file if longer) + // - word has a qualifier (foo::bar) + return false; +} + +using ScoredLocatedSymbol = std::pair; +struct ScoredSymbolGreater { + bool operator()(const ScoredLocatedSymbol &L, const ScoredLocatedSymbol &R) { + if (L.first != R.first) + return L.first > R.first; + return L.second.Name < R.second.Name; // Earlier name is better. + } +}; + +std::vector navigationFallback(ParsedAST &AST, + const SymbolIndex *Index, + Position Pos, + const std::string &MainFilePath) { + const auto &SM = AST.getSourceManager(); + auto SourceRange = getWordAtPosition(Pos, SM, AST.getLangOpts()); + auto QueryString = toSourceCode(SM, SourceRange); + if (!isLikelyToBeIdentifier(QueryString)) { + return {}; + } + + FuzzyFindRequest Req; + Req.Query = QueryString.str(); + Req.ProximityPaths = {MainFilePath}; + // FIXME: Set Req.Scopes to the lexically enclosing scopes. + // For extra strictness, consider AnyScope=false. + Req.AnyScope = true; + // Choose a limit that's large enough that it contains the user's desired + // target even in the presence of some false positives, but small enough that + // it doesn't generate too much noise. + Req.Limit = 5; + TopN Top(*Req.Limit); + FuzzyMatcher Filter(Req.Query); + auto MainFileURI = URIForFile::canonicalize(MainFilePath, MainFilePath); + bool HaveResultInMainFile = false; + Index->fuzzyFind(Req, [&](const Symbol &Sym) { + auto Loc = symbolToLocation(Sym, MainFilePath); + if (!Loc) { + log("Navigation fallback: {0}", Loc.takeError()); + return; + } + + // For now, only consider exact name matches, including case. + // This is to avoid too many false positives. + // We could relax this in the future if we make the query more accurate + // by other means. + if (Sym.Name != QueryString) + return; + + if (Loc->uri == MainFileURI) + HaveResultInMainFile = true; + + std::string Scope = std::string(Sym.Scope); + llvm::StringRef ScopeRef = Scope; + ScopeRef.consume_back("::"); + LocatedSymbol Located; + Located.Name = (Sym.Name + Sym.TemplateSpecializationArgs).str(); + Located.PreferredDeclaration = *Loc; + // FIXME: Populate Definition? + + SymbolQualitySignals Quality; + Quality.merge(Sym); + SymbolRelevanceSignals Relevance; + Relevance.Name = Sym.Name; + Relevance.Query = SymbolRelevanceSignals::Generic; + if (auto NameMatch = Filter.match(Sym.Name)) + Relevance.NameMatch = *NameMatch; + else { + log("Navigation fallback: {0} didn't match query {1}", Sym.Name, + Filter.pattern()); + return; + } + Relevance.merge(Sym); + auto Score = + evaluateSymbolAndRelevance(Quality.evaluate(), Relevance.evaluate()); + dlog("Navigation fallback: {0}{1} = {2}\n{3}{4}\n", Sym.Scope, Sym.Name, + Score, Quality, Relevance); + + Top.push({Score, std::move(Located)}); + }); + std::vector Result; + for (auto &Res : std::move(Top).items()) + Result.push_back(std::move(Res.second)); + // If we have more than 3 results, and none from the current file, don't + // return anything, as confidence is too low. + // FIXME: Alternatively, try a stricter query? + if (Result.size() > 3 && !HaveResultInMainFile) + return {}; + return Result; +} + std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index) { const auto &SM = AST.getSourceManager(); @@ -341,6 +454,10 @@ }); } + if (Result.empty()) { + return navigationFallback(AST, Index, Pos, *MainFilePath); + } + return Result; } diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -586,6 +586,52 @@ } } +TEST(LocateSymbol, Textual) { + const char *Tests[] = { + R"cpp(// Comment + struct [[MyClass]] {}; + // Comment mentioning M^yClass + )cpp", + R"cpp(// String + struct [[MyClass]] {}; + const char* s = "String literal mentioning M^yClass"; + )cpp", + R"cpp(// Invalid code + /*error-ok*/ + int [[myFunction]](int); + int var = m^yFunction(); + )cpp", + R"cpp(// Dependent type + struct Foo { + void [[uniqueMethodName]](); + }; + template + void f(T t) { + t->u^niqueMethodName(); + } + )cpp"}; + + for (const char *Test : Tests) { + Annotations T(Test); + llvm::Optional WantDecl; + if (!T.ranges().empty()) + WantDecl = T.range(); + + auto TU = TestTU::withCode(T.code()); + + auto AST = TU.build(); + auto Index = TU.index(); + auto Results = locateSymbolAt(AST, T.point(), Index.get()); + + if (!WantDecl) { + EXPECT_THAT(Results, IsEmpty()) << Test; + } else { + ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test; + EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test; + } + } +} + TEST(LocateSymbol, Ambiguous) { auto T = Annotations(R"cpp( struct Foo { @@ -660,6 +706,27 @@ Sym("baz", T.range("StaticOverload2")))); } +TEST(LocateSymbol, TextualAmbiguous) { + auto T = Annotations(R"cpp( + struct Foo { + void $FooLoc[[uniqueMethodName]](); + }; + struct Bar { + void $BarLoc[[uniqueMethodName]](); + }; + template + void f(T t) { + t->u^niqueMethodName(); + } + )cpp"); + auto TU = TestTU::withCode(T.code()); + auto AST = TU.build(); + auto Index = TU.index(); + EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()), + UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")), + Sym("uniqueMethodName", T.range("BarLoc")))); +} + TEST(LocateSymbol, TemplateTypedefs) { auto T = Annotations(R"cpp( template struct function {};