diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -68,7 +68,7 @@ /// FIXME: This should return an error if the location is invalid. Position sourceLocToPosition(const SourceManager &SM, SourceLocation Loc); -/// Returns the taken range at \p TokLoc. +/// Returns the token range at \p TokLoc. llvm::Optional getTokenRange(const SourceManager &SM, const LangOptions &LangOpts, SourceLocation TokLoc); @@ -86,6 +86,13 @@ const SourceManager &SM, const LangOptions &LangOpts); +/// Get the source range of the raw word at a specified \p Pos in the main file. +/// This is similar to the token at the specified position, but for positions +/// inside comments and strings, it only returns a single word rather than +/// the entire comment or string token. +SourceRange getWordAtPosition(const Position &Pos, const SourceManager &SM, + const LangOptions &LangOpts); + /// Returns true iff \p Loc is inside the main file. This function handles /// file & macro locations. For macro locations, returns iff the macro is being /// expanded inside the main file. diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -272,6 +272,42 @@ return Other; } +SourceLocation getRawWordBegin(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + llvm::StringRef Buf = SM.getBufferData(SM.getMainFileID()); + FileID FID; + unsigned Offset; + std::tie(FID, Offset) = SM.getDecomposedLoc(Loc); + unsigned Start = Offset; + while (Start > 0 && Lexer::isIdentifierBodyChar(Buf[Start - 1], LangOpts)) { + --Start; + } + return SM.getComposedLoc(FID, Start); +} + +SourceLocation getRawWordEnd(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + llvm::StringRef Buf = SM.getBufferData(SM.getMainFileID()); + FileID FID; + unsigned Offset; + std::tie(FID, Offset) = SM.getDecomposedLoc(Loc); + unsigned End = Offset; + while (End < Buf.size() && Lexer::isIdentifierBodyChar(Buf[End], LangOpts)) + ++End; + return SM.getComposedLoc(FID, End); +} + +SourceLocation getEndOfIdentifier(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + if (!Loc.isValid()) + return SourceLocation{}; + bool Raw = getTokenFlavor(Loc, SM, LangOpts) == Other; + if (Raw) { + return getRawWordEnd(Loc, SM, LangOpts); + } + return Lexer::getLocForEndOfToken(Loc, 0, SM, LangOpts); +} + } // namespace SourceLocation getBeginningOfIdentifier(const Position &Pos, @@ -311,8 +347,9 @@ // For interesting token, we return the beginning of the token. if (CurrentKind == Identifier || CurrentKind == Operator) return CurrentTokBeginning; - // otherwise, we return the original loc. - return InputLoc; + // Otherwise, return the beginning of the raw word at the given position. + // This facilitates selecting identifiers in comments and strings. + return getRawWordBegin(InputLoc, SM, LangOpts); } // Whitespace is not interesting. @@ -337,6 +374,13 @@ return InputLoc; } +SourceRange getWordAtPosition(const Position &Pos, const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation Begin = getBeginningOfIdentifier(Pos, SM, LangOpts); + SourceLocation End = getEndOfIdentifier(Begin, SM, LangOpts); + return {Begin, End}; +} + bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { if (!R.getBegin().isValid() || !R.getEnd().isValid()) return false; @@ -793,97 +837,99 @@ NamespaceEvent Event; lex(Code, format::getFormattingLangOpts(Style), - [&](const clang::Token &Tok,const SourceManager &SM) { - Event.Pos = sourceLocToPosition(SM, Tok.getLocation()); - switch (Tok.getKind()) { - case tok::raw_identifier: - // In raw mode, this could be a keyword or a name. - switch (State) { - case UsingNamespace: - case UsingNamespaceName: - NSName.append(Tok.getRawIdentifier()); - State = UsingNamespaceName; - break; - case Namespace: - case NamespaceName: - NSName.append(Tok.getRawIdentifier()); - State = NamespaceName; - break; - case Using: - State = - (Tok.getRawIdentifier() == "namespace") ? UsingNamespace : Default; - break; - case Default: - NSName.clear(); - if (Tok.getRawIdentifier() == "namespace") - State = Namespace; - else if (Tok.getRawIdentifier() == "using") - State = Using; - break; - } - break; - case tok::coloncolon: - // This can come at the beginning or in the middle of a namespace name. - switch (State) { - case UsingNamespace: - case UsingNamespaceName: - NSName.append("::"); - State = UsingNamespaceName; - break; - case NamespaceName: - NSName.append("::"); - State = NamespaceName; - break; - case Namespace: // Not legal here. - case Using: - case Default: - State = Default; - break; - } - break; - case tok::l_brace: - // Record which { started a namespace, so we know when } ends one. - if (State == NamespaceName) { - // Parsed: namespace { - BraceStack.push_back(true); - Enclosing.push_back(NSName); - Event.Trigger = NamespaceEvent::BeginNamespace; - Event.Payload = llvm::join(Enclosing, "::"); - Callback(Event); - } else { - // This case includes anonymous namespaces (State = Namespace). - // For our purposes, they're not namespaces and we ignore them. - BraceStack.push_back(false); - } - State = Default; - break; - case tok::r_brace: - // If braces are unmatched, we're going to be confused, but don't crash. - if (!BraceStack.empty()) { - if (BraceStack.back()) { - // Parsed: } // namespace - Enclosing.pop_back(); - Event.Trigger = NamespaceEvent::EndNamespace; - Event.Payload = llvm::join(Enclosing, "::"); - Callback(Event); + [&](const clang::Token &Tok, const SourceManager &SM) { + Event.Pos = sourceLocToPosition(SM, Tok.getLocation()); + switch (Tok.getKind()) { + case tok::raw_identifier: + // In raw mode, this could be a keyword or a name. + switch (State) { + case UsingNamespace: + case UsingNamespaceName: + NSName.append(Tok.getRawIdentifier()); + State = UsingNamespaceName; + break; + case Namespace: + case NamespaceName: + NSName.append(Tok.getRawIdentifier()); + State = NamespaceName; + break; + case Using: + State = (Tok.getRawIdentifier() == "namespace") ? UsingNamespace + : Default; + break; + case Default: + NSName.clear(); + if (Tok.getRawIdentifier() == "namespace") + State = Namespace; + else if (Tok.getRawIdentifier() == "using") + State = Using; + break; + } + break; + case tok::coloncolon: + // This can come at the beginning or in the middle of a namespace + // name. + switch (State) { + case UsingNamespace: + case UsingNamespaceName: + NSName.append("::"); + State = UsingNamespaceName; + break; + case NamespaceName: + NSName.append("::"); + State = NamespaceName; + break; + case Namespace: // Not legal here. + case Using: + case Default: + State = Default; + break; + } + break; + case tok::l_brace: + // Record which { started a namespace, so we know when } ends one. + if (State == NamespaceName) { + // Parsed: namespace { + BraceStack.push_back(true); + Enclosing.push_back(NSName); + Event.Trigger = NamespaceEvent::BeginNamespace; + Event.Payload = llvm::join(Enclosing, "::"); + Callback(Event); + } else { + // This case includes anonymous namespaces (State = Namespace). + // For our purposes, they're not namespaces and we ignore them. + BraceStack.push_back(false); + } + State = Default; + break; + case tok::r_brace: + // If braces are unmatched, we're going to be confused, but don't + // crash. + if (!BraceStack.empty()) { + if (BraceStack.back()) { + // Parsed: } // namespace + Enclosing.pop_back(); + Event.Trigger = NamespaceEvent::EndNamespace; + Event.Payload = llvm::join(Enclosing, "::"); + Callback(Event); + } + BraceStack.pop_back(); + } + break; + case tok::semi: + if (State == UsingNamespaceName) { + // Parsed: using namespace ; + Event.Trigger = NamespaceEvent::UsingDirective; + Event.Payload = std::move(NSName); + Callback(Event); + } + State = Default; + break; + default: + State = Default; + break; } - BraceStack.pop_back(); - } - break; - case tok::semi: - if (State == UsingNamespaceName) { - // Parsed: using namespace ; - Event.Trigger = NamespaceEvent::UsingDirective; - Event.Payload = std::move(NSName); - Callback(Event); - } - State = Default; - break; - default: - State = Default; - break; - } - }); + }); } // Returns the prefix namespaces of NS: {"" ... NS}. diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -187,6 +187,32 @@ return Result; } +std::vector navigationFallback(ParsedAST &AST, + const SymbolIndex *Index, + Position Pos, + const std::string &MainFilePath) { + const auto &SM = AST.getSourceManager(); + auto SourceRange = getWordAtPosition(Pos, SM, AST.getLangOpts()); + auto QueryString = toSourceCode(SM, SourceRange); + // Choose a limit that's large enough that it contains the user's desired + // target even in the presence of some false positives, but small enough that + // it doesn't generate too much noise. + int Limit = 5; + auto Symbols = getWorkspaceSymbols(QueryString, Limit, Index, MainFilePath); + if (!Symbols) { + elog("Workspace symbols failed", Symbols.takeError()); + } + std::vector Result; + for (auto &Sym : *Symbols) { + LocatedSymbol Located; + Located.Name = Sym.name; + Located.PreferredDeclaration = Sym.location; + // TODO: Populate Definition? + Result.push_back(std::move(Located)); + } + return Result; +} + std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index) { const auto &SM = AST.getSourceManager(); @@ -318,6 +344,10 @@ }); } + if (Result.empty()) { + return navigationFallback(AST, Index, Pos, *MainFilePath); + } + return Result; } diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -585,6 +585,51 @@ } } +TEST(LocateSymbol, Textual) { + const char *Tests[] = { + R"cpp(// Comment + struct [[Foo]] {}; + // Comment mentioning F^oo + )cpp", + R"cpp(// String + struct [[Foo]] {}; + const char* = "String literal mentioning F^oo"; + )cpp", + R"cpp(// Invalid code + int [[foo]](int); + int var = f^oo(); + )cpp", + R"cpp(// Dependent type + struct Foo { + void [[uniqueMethodName]](); + }; + template + void f(T t) { + t->u^niqueMethodName(); + } + )cpp"}; + + for (const char *Test : Tests) { + Annotations T(Test); + llvm::Optional WantDecl; + if (!T.ranges().empty()) + WantDecl = T.range(); + + auto TU = TestTU::withCode(T.code()); + + auto AST = TU.build(); + auto Index = TU.index(); + auto Results = locateSymbolAt(AST, T.point(), Index.get()); + + if (!WantDecl) { + EXPECT_THAT(Results, IsEmpty()) << Test; + } else { + ASSERT_THAT(Results, ::testing::SizeIs(1)) << Test; + EXPECT_EQ(Results[0].PreferredDeclaration.range, *WantDecl) << Test; + } + } +} + TEST(LocateSymbol, Ambiguous) { auto T = Annotations(R"cpp( struct Foo { @@ -659,6 +704,27 @@ Sym("baz", T.range("StaticOverload2")))); } +TEST(LocateSymbol, TextualAmbiguous) { + auto T = Annotations(R"cpp( + struct Foo { + void $FooLoc[[uniqueMethodName]](); + }; + struct Bar { + void $BarLoc[[uniqueMethodName]](); + }; + template + void f(T t) { + t->u^niqueMethodName(); + } + )cpp"); + auto TU = TestTU::withCode(T.code()); + auto AST = TU.build(); + auto Index = TU.index(); + EXPECT_THAT(locateSymbolAt(AST, T.point(), Index.get()), + UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")), + Sym("uniqueMethodName", T.range("BarLoc")))); +} + TEST(LocateSymbol, TemplateTypedefs) { auto T = Annotations(R"cpp( template struct function {};