diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h --- a/clang-tools-extra/clangd/SourceCode.h +++ b/clang-tools-extra/clangd/SourceCode.h @@ -216,6 +216,35 @@ /// - drops stopwords like "get" and "for" llvm::StringSet<> collectWords(llvm::StringRef Content); +// Something that looks like a word in the source code. +// Could be a "real" token that's "live" in the AST, a spelled token consumed by +// the preprocessor, or part of a spelled token (e.g. word in a comment). +struct SpelledWord { + // (Spelling) location of the start of the word. + SourceLocation Location; + // The range of the word itself, excluding any quotes. + // This is a subrange of the file buffer. + llvm::StringRef Text; + // Whether this word is likely to refer to an identifier. True if: + // - the word is a spelled identifier token + // - Text is identifier-like (e.g. "foo_bar") + // - Text is surrounded by backticks (e.g. Foo in "// returns `Foo`") + bool LikelyIdentifier = false; + // Set if the word is contained in a token spelled in the file. + // (This should always be true, but comments aren't retained by TokenBuffer). + const syntax::Token *PartOfSpelledToken = nullptr; + // Set if the word is exactly a token spelled in the file. + const syntax::Token *SpelledToken = nullptr; + // Set if the word is a token spelled in the file, and that token survives + // preprocessing to emit an expanded token spelled the same way. + const syntax::Token *ExpandedToken = nullptr; + + // Find the unique word that contains SpelledLoc or starts/ends there. + static llvm::Optional touching(SourceLocation SpelledLoc, + const syntax::TokenBuffer &TB, + const LangOptions &LangOpts); +}; + /// Heuristically determine namespaces visible at a point, without parsing Code. /// This considers using-directives and enclosing namespace-declarations that /// are visible (and not obfuscated) in the file itself (not headers). diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -855,6 +855,96 @@ return Result; } +static bool isLikelyIdentifier(llvm::StringRef Word, llvm::StringRef Before, + llvm::StringRef After) { + // `foo` is an identifier. + if (Before.endswith("`") && After.startswith("`")) + return true; + // In foo::bar, both foo and bar are identifiers. + if (Before.endswith("::") || After.startswith("::")) + return true; + // Doxygen tags like \c foo indicate identifiers. + // Don't search too far back. + // This duplicates clang's doxygen parser, revisit if it gets complicated. + Before = Before.take_back(100); // Don't search too far back. + auto Pos = Before.find_last_of("\\@"); + if (Pos != llvm::StringRef::npos) { + llvm::StringRef Tag = Before.substr(Pos + 1).rtrim(' '); + if (Tag == "p" || Tag == "c" || Tag == "class" || Tag == "tparam" || + Tag == "param" || Tag == "param[in]" || Tag == "param[out]" || + Tag == "param[in,out]" || Tag == "retval" || Tag == "throw" || + Tag == "throws" || Tag == "link") + return true; + } + + // Word contains underscore. + // This handles things like snake_case and MACRO_CASE. + if (Word.contains('_')) { + return true; + } + // Word contains capital letter other than at beginning. + // This handles things like lowerCamel and UpperCamel. + // The check for also containing a lowercase letter is to rule out + // initialisms like "HTTP". + bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos; + bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos; + if (HasLower && HasUpper) { + return true; + } + // FIXME: consider mid-sentence Capitalization? + return false; +} + +llvm::Optional SpelledWord::touching(SourceLocation SpelledLoc, + const syntax::TokenBuffer &TB, + const LangOptions &LangOpts) { + const auto &SM = TB.sourceManager(); + auto Touching = syntax::spelledTokensTouching(SpelledLoc, TB); + for (const auto &T : Touching) { + // If the token is an identifier or a keyword, don't use any heuristics. + if (tok::isAnyIdentifier(T.kind()) || tok::getKeywordSpelling(T.kind())) { + SpelledWord Result; + Result.Location = T.location(); + Result.Text = T.text(SM); + Result.LikelyIdentifier = tok::isAnyIdentifier(T.kind()); + Result.PartOfSpelledToken = &T; + Result.SpelledToken = &T; + auto Expanded = + TB.expandedTokens(SM.getMacroArgExpandedLocation(T.location())); + if (Expanded.size() == 1 && Expanded.front().text(SM) == Result.Text) + Result.ExpandedToken = &Expanded.front(); + return Result; + } + } + FileID File; + unsigned Offset; + std::tie(File, Offset) = SM.getDecomposedLoc(SpelledLoc); + bool Invalid = false; + llvm::StringRef Code = SM.getBufferData(File, &Invalid); + if (Invalid) + return llvm::None; + unsigned B = Offset, E = Offset; + while (B > 0 && isIdentifierBody(Code[B - 1])) + --B; + while (E < Code.size() && isIdentifierBody(Code[E])) + ++E; + if (B == E) + return llvm::None; + + SpelledWord Result; + Result.Location = SM.getComposedLoc(File, B); + Result.Text = Code.slice(B, E); + Result.LikelyIdentifier = + isLikelyIdentifier(Result.Text, Code.substr(0, B), Code.substr(E)) && + // should not be a keyword + tok::isAnyIdentifier( + IdentifierTable(LangOpts).get(Result.Text).getTokenID()); + for (const auto &T : Touching) + if (T.location() <= Result.Location) + Result.PartOfSpelledToken = &T; + return Result; +} + llvm::Optional locateMacroAt(const syntax::Token &SpelledTok, Preprocessor &PP) { SourceLocation Loc = SpelledTok.location(); diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h --- a/clang-tools-extra/clangd/XRefs.h +++ b/clang-tools-extra/clangd/XRefs.h @@ -16,6 +16,7 @@ #include "FormattedString.h" #include "Path.h" #include "Protocol.h" +#include "SourceCode.h" #include "index/Index.h" #include "index/SymbolLocation.h" #include "clang/AST/Type.h" @@ -26,6 +27,10 @@ #include namespace clang { +namespace syntax { +class Token; +class TokenBuffer; +} // namespace syntax namespace clangd { class ParsedAST; @@ -49,20 +54,22 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index = nullptr); -// Tries to provide a textual fallback for locating a symbol referenced at -// a location, by looking up the word under the cursor as a symbol name in the -// index. The aim is to pick up references to symbols in contexts where +// Tries to provide a textual fallback for locating a symbol by looking up the +// word under the cursor as a symbol name in the index. +// The aim is to pick up references to symbols in contexts where // AST-based resolution does not work, such as comments, strings, and PP -// disabled regions. The implementation takes a number of measures to avoid -// false positives, such as looking for some signal that the word at the -// given location is likely to be an identifier. The function does not -// currently return results for locations that end up as real expanded -// tokens, although this may be relaxed for e.g. dependent code in the future. +// disabled regions. // (This is for internal use by locateSymbolAt, and is exposed for testing). std::vector -locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index, - SourceLocation Loc, - const std::string &MainFilePath); +locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST, + const SymbolIndex *Index, + const std::string &MainFilePath); + +// Try to find a proximate occurrence of `Word` as an identifier, which can be +// used to resolve it. +// (This is for internal use by locateSymbolAt, and is exposed for testing). +const syntax::Token *findNearbyIdentifier(const SpelledWord &Word, + const syntax::TokenBuffer &TB); /// Get all document links std::vector getDocumentLinks(ParsedAST &AST); diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -34,6 +34,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Index/IndexDataConsumer.h" #include "clang/Index/IndexSymbol.h" #include "clang/Index/IndexingAction.h" @@ -48,6 +49,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -315,93 +317,44 @@ return Result; } -llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) { - unsigned B = Offset, E = Offset; - while (B > 0 && isIdentifierBody(Code[B - 1])) - --B; - while (E < Code.size() && isIdentifierBody(Code[E])) - ++E; - return Code.slice(B, E); +bool tokenSpelledAt(SourceLocation SpellingLoc, const syntax::TokenBuffer &TB) { + auto ExpandedTokens = TB.expandedTokens( + TB.sourceManager().getMacroArgExpandedLocation(SpellingLoc)); + return !ExpandedTokens.empty(); } -bool isLikelyToBeIdentifier(StringRef Word) { - // Word contains underscore. - // This handles things like snake_case and MACRO_CASE. - if (Word.contains('_')) { - return true; - } - // Word contains capital letter other than at beginning. - // This handles things like lowerCamel and UpperCamel. - // The check for also containing a lowercase letter is to rule out - // initialisms like "HTTP". - bool HasLower = Word.find_if(clang::isLowercase) != StringRef::npos; - bool HasUpper = Word.substr(1).find_if(clang::isUppercase) != StringRef::npos; - if (HasLower && HasUpper) { - return true; - } - // FIXME: There are other signals we could listen for. - // Some of these require inspecting the surroundings of the word as well. - // - mid-sentence Capitalization - // - markup like quotes / backticks / brackets / "\p" - // - word has a qualifier (foo::bar) - return false; -} - -bool tokenSurvivedPreprocessing(SourceLocation Loc, - const syntax::TokenBuffer &TB) { - auto WordExpandedTokens = - TB.expandedTokens(TB.sourceManager().getMacroArgExpandedLocation(Loc)); - return !WordExpandedTokens.empty(); +llvm::StringRef sourcePrefix(SourceLocation Loc, const SourceManager &SM) { + auto D = SM.getDecomposedLoc(Loc); + bool Invalid = false; + llvm::StringRef Buf = SM.getBufferData(D.first, &Invalid); + if (Invalid || D.second > Buf.size()) + return ""; + return Buf.substr(0, D.second); } } // namespace std::vector -locateSymbolNamedTextuallyAt(ParsedAST &AST, const SymbolIndex *Index, - SourceLocation Loc, - const std::string &MainFilePath) { - const auto &SM = AST.getSourceManager(); - - // Get the raw word at the specified location. - unsigned Pos; - FileID File; - std::tie(File, Pos) = SM.getDecomposedLoc(Loc); - llvm::StringRef Code = SM.getBufferData(File); - llvm::StringRef Word = wordTouching(Code, Pos); - if (Word.empty()) - return {}; - unsigned WordOffset = Word.data() - Code.data(); - SourceLocation WordStart = SM.getComposedLoc(File, WordOffset); - - // Attempt to determine the kind of token that contains the word, - // and bail if it's a string literal. Note that we cannot always - // determine the token kind (e.g. comments, for which we do want - // to activate, are not retained by TokenBuffer). - for (syntax::Token T : - syntax::spelledTokensTouching(WordStart, AST.getTokens())) { - if (T.range(AST.getSourceManager()).touches(WordOffset + Word.size())) { - if (isStringLiteral(T.kind())) - return {}; - } - } - - // Do not consider tokens that survived preprocessing. - // We are erring on the safe side here, as a user may expect to get - // accurate (as opposed to textual-heuristic) results for such tokens. - // FIXME: Relax this for dependent code. - if (tokenSurvivedPreprocessing(WordStart, AST.getTokens())) +locateSymbolTextually(const SpelledWord &Word, ParsedAST &AST, + const SymbolIndex *Index, + const std::string &MainFilePath) { + // Don't use heuristics if this is a real identifier, or not an identifier. + if (Word.ExpandedToken || !Word.LikelyIdentifier || !Index) return {}; - - // Additionally filter for signals that the word is likely to be an - // identifier. This avoids triggering on e.g. random words in a comment. - if (!isLikelyToBeIdentifier(Word)) + // We don't want to handle words in string literals. It'd be nice to whitelist + // comments instead, but they're not retained in TokenBuffer. + if (Word.PartOfSpelledToken && + isStringLiteral(Word.PartOfSpelledToken->kind())) return {}; + const auto &SM = AST.getSourceManager(); // Look up the selected word in the index. FuzzyFindRequest Req; - Req.Query = Word.str(); + Req.Query = Word.Text.str(); Req.ProximityPaths = {MainFilePath}; - Req.Scopes = visibleNamespaces(Code.take_front(Pos), AST.getLangOpts()); + // Find the namespaces to query by lexing the file. + Req.Scopes = + visibleNamespaces(sourcePrefix(Word.Location, SM), AST.getLangOpts()); // FIXME: For extra strictness, consider AnyScope=false. Req.AnyScope = true; // We limit the results to 3 further below. This limit is to avoid fetching @@ -416,7 +369,7 @@ // This is to avoid too many false positives. // We could relax this in the future (e.g. to allow for typos) if we make // the query more accurate by other means. - if (Sym.Name != Word) + if (Sym.Name != Word.Text) return; // Exclude constructor results. They have the same name as the class, @@ -481,6 +434,82 @@ return Results; } +const syntax::Token *findNearbyIdentifier(const SpelledWord &Word, + const syntax::TokenBuffer &TB) { + // Don't use heuristics if this is a real identifier. + // Unlikely identifiers are OK if they were used as identifiers nearby. + if (Word.ExpandedToken) + return nullptr; + // We don't want to handle words in string literals. It'd be nice to whitelist + // comments instead, but they're not retained in TokenBuffer. + if (Word.PartOfSpelledToken && + isStringLiteral(Word.PartOfSpelledToken->kind())) + return {}; + + const SourceManager &SM = TB.sourceManager(); + // We prefer the closest possible token, line-wise. Backwards is penalized. + // Ties are implicitly broken by traversal order (first-one-wins). + auto File = SM.getFileID(Word.Location); + unsigned WordLine = SM.getSpellingLineNumber(Word.Location); + auto Cost = [&](SourceLocation Loc) -> unsigned { + assert(SM.getFileID(Loc) == File && "spelled token in wrong file?"); + unsigned Line = SM.getSpellingLineNumber(Loc); + if (Line > WordLine) + return 1 + llvm::Log2_64(Line - WordLine); + if (Line < WordLine) + return 2 + llvm::Log2_64(WordLine - Line); + return 0; + }; + const syntax::Token *BestTok = nullptr; + // Search bounds are based on word length: 2^N lines forward. + unsigned BestCost = Word.Text.size() + 1; + + // Updates BestTok and BestCost if Tok is a good candidate. + // May return true if the cost is too high for this token. + auto Consider = [&](const syntax::Token &Tok) { + if (!(Tok.kind() == tok::identifier && Tok.text(SM) == Word.Text)) + return false; + // No point guessing the same location we started with. + if (Tok.location() == Word.Location) + return false; + // We've done cheap checks, compute cost so we can break the caller's loop. + unsigned TokCost = Cost(Tok.location()); + if (TokCost >= BestCost) + return true; // causes the outer loop to break. + // Allow locations that might be part of the AST, and macros (even if empty) + // but not things like disabled preprocessor sections. + if (!(tokenSpelledAt(Tok.location(), TB) || TB.expansionStartingAt(&Tok))) + return false; + // We already verified this token is an improvement. + BestCost = TokCost; + BestTok = &Tok; + return false; + }; + auto SpelledTokens = TB.spelledTokens(File); + // Find where the word occurred in the token stream, to search forward & back. + auto *I = llvm::partition_point(SpelledTokens, [&](const syntax::Token &T) { + assert(SM.getFileID(T.location()) == SM.getFileID(Word.Location)); + return T.location() >= Word.Location; // Comparison OK: same file. + }); + // Search for matches after the cursor. + for (const syntax::Token &Tok : llvm::makeArrayRef(I, SpelledTokens.end())) + if (Consider(Tok)) + break; // costs of later tokens are greater... + // Search for matches before the cursor. + for (const syntax::Token &Tok : + llvm::reverse(llvm::makeArrayRef(SpelledTokens.begin(), I))) + if (Consider(Tok)) + break; + + if (BestTok) + vlog( + "Word {0} under cursor {1} isn't a token (after PP), trying nearby {2}", + Word.Text, Word.Location.printToString(SM), + BestTok->location().printToString(SM)); + + return BestTok; +} + std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index) { const auto &SM = AST.getSourceManager(); @@ -516,7 +545,28 @@ if (!ASTResults.empty()) return ASTResults; - return locateSymbolNamedTextuallyAt(AST, Index, *CurLoc, *MainFilePath); + // If the cursor can't be resolved directly, try fallback strategies. + auto Word = + SpelledWord::touching(*CurLoc, AST.getTokens(), AST.getLangOpts()); + if (Word) { + // Is the same word nearby a real identifier that might refer to something? + if (const syntax::Token *NearbyIdent = + findNearbyIdentifier(*Word, AST.getTokens())) { + if (auto Macro = locateMacroReferent(*NearbyIdent, AST, *MainFilePath)) + return {*std::move(Macro)}; + ASTResults = locateASTReferent(NearbyIdent->location(), NearbyIdent, AST, + *MainFilePath, Index); + if (!ASTResults.empty()) + return ASTResults; + } + // No nearby word, or it didn't refer to anything either. Try the index. + auto TextualResults = + locateSymbolTextually(*Word, AST, Index, *MainFilePath); + if (!TextualResults.empty()) + return TextualResults; + } + + return {}; } std::vector getDocumentLinks(ParsedAST &AST) { diff --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp --- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp +++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp @@ -12,6 +12,7 @@ #include "TestTU.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Format/Format.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_os_ostream.h" @@ -328,6 +329,101 @@ EXPECT_EQ(ActualWords, ExpectedWords); } +class SpelledWordsTest : public ::testing::Test { + llvm::Optional AST; + + llvm::Optional tryWord(const char *Text) { + llvm::Annotations A(Text); + auto TU = TestTU::withCode(A.code()); + AST = TU.build(); + auto SW = SpelledWord::touching( + AST->getSourceManager().getComposedLoc( + AST->getSourceManager().getMainFileID(), A.point()), + AST->getTokens(), AST->getLangOpts()); + if (A.ranges().size()) { + llvm::StringRef Want = A.code().slice(A.range().Begin, A.range().End); + EXPECT_EQ(Want, SW->Text) << Text; + } + return SW; + } + +protected: + SpelledWord word(const char *Text) { + auto Result = tryWord(Text); + EXPECT_TRUE(Result) << Text; + return Result.getValueOr(SpelledWord()); + } + + void noWord(const char *Text) { EXPECT_FALSE(tryWord(Text)) << Text; } +}; + +TEST_F(SpelledWordsTest, HeuristicBoundaries) { + word("// [[^foo]] "); + word("// [[f^oo]] "); + word("// [[foo^]] "); + word("// [[foo^]]+bar "); + noWord("//^ foo "); + noWord("// foo ^"); +} + +TEST_F(SpelledWordsTest, LikelyIdentifier) { + EXPECT_FALSE(word("// ^foo ").LikelyIdentifier); + EXPECT_TRUE(word("// [[^foo_bar]] ").LikelyIdentifier); + EXPECT_TRUE(word("// [[^fooBar]] ").LikelyIdentifier); + EXPECT_FALSE(word("// H^TTP ").LikelyIdentifier); + EXPECT_TRUE(word("// \\p [[^foo]] ").LikelyIdentifier); + EXPECT_TRUE(word("// @param[in] [[^foo]] ").LikelyIdentifier); + EXPECT_TRUE(word("// `[[f^oo]]` ").LikelyIdentifier); + EXPECT_TRUE(word("// bar::[[f^oo]] ").LikelyIdentifier); + EXPECT_TRUE(word("// [[f^oo]]::bar ").LikelyIdentifier); +} + +TEST_F(SpelledWordsTest, Comment) { + auto W = word("// [[^foo]]"); + EXPECT_FALSE(W.PartOfSpelledToken); + EXPECT_FALSE(W.SpelledToken); + EXPECT_FALSE(W.ExpandedToken); +} + +TEST_F(SpelledWordsTest, PartOfString) { + auto W = word(R"( auto str = "foo [[^bar]] baz"; )"); + ASSERT_TRUE(W.PartOfSpelledToken); + EXPECT_EQ(W.PartOfSpelledToken->kind(), tok::string_literal); + EXPECT_FALSE(W.SpelledToken); + EXPECT_FALSE(W.ExpandedToken); +} + +TEST_F(SpelledWordsTest, DisabledSection) { + auto W = word(R"cpp( + #if 0 + foo [[^bar]] baz + #endif + )cpp"); + ASSERT_TRUE(W.SpelledToken); + EXPECT_EQ(W.SpelledToken->kind(), tok::identifier); + EXPECT_EQ(W.SpelledToken, W.PartOfSpelledToken); + EXPECT_FALSE(W.ExpandedToken); +} + +TEST_F(SpelledWordsTest, Macros) { + auto W = word(R"cpp( + #define ID(X) X + ID(int [[^i]]); + )cpp"); + ASSERT_TRUE(W.SpelledToken); + EXPECT_EQ(W.SpelledToken->kind(), tok::identifier); + EXPECT_EQ(W.SpelledToken, W.PartOfSpelledToken); + ASSERT_TRUE(W.ExpandedToken); + EXPECT_EQ(W.ExpandedToken->kind(), tok::identifier); + + W = word(R"cpp( + #define OBJECT Expansion; + int [[^OBJECT]]; + )cpp"); + EXPECT_TRUE(W.SpelledToken); + EXPECT_FALSE(W.ExpandedToken) << "Expanded token is spelled differently"; +} + TEST(SourceCodeTests, VisibleNamespaces) { std::vector>> Cases = { { diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -685,10 +685,15 @@ auto AST = TU.build(); auto Index = TU.index(); - auto Results = locateSymbolNamedTextuallyAt( - AST, Index.get(), + auto Word = SpelledWord::touching( cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())), - testPath(TU.Filename)); + AST.getTokens(), AST.getLangOpts()); + if (!Word) { + ADD_FAILURE() << "No word touching point!" << Test; + continue; + } + auto Results = + locateSymbolTextually(*Word, AST, Index.get(), testPath(TU.Filename)); if (!WantDecl) { EXPECT_THAT(Results, IsEmpty()) << Test; @@ -788,10 +793,12 @@ auto TU = TestTU::withCode(T.code()); auto AST = TU.build(); auto Index = TU.index(); - auto Results = locateSymbolNamedTextuallyAt( - AST, Index.get(), + auto Word = SpelledWord::touching( cantFail(sourceLocationInMainFile(AST.getSourceManager(), T.point())), - testPath(TU.Filename)); + AST.getTokens(), AST.getLangOpts()); + ASSERT_TRUE(Word); + auto Results = + locateSymbolTextually(*Word, AST, Index.get(), testPath(TU.Filename)); EXPECT_THAT(Results, UnorderedElementsAre(Sym("uniqueMethodName", T.range("FooLoc")), Sym("uniqueMethodName", T.range("BarLoc")))); @@ -985,6 +992,101 @@ ElementsAre(Sym("foo", FooWithoutHeader.range()))); } +TEST(LocateSymbol, NearbyTokenSmoke) { + auto T = Annotations(R"cpp( + // prints e^rr and crashes + void die(const char* [[err]]); + )cpp"); + auto AST = TestTU::withCode(T.code()).build(); + // We don't pass an index, so can't hit index-based fallback. + EXPECT_THAT(locateSymbolAt(AST, T.point()), + ElementsAre(Sym("err", T.range()))); +} + +TEST(LocateSymbol, NearbyIdentifier) { + const char *Tests[] = { + R"cpp( + // regular identifiers (won't trigger) + int hello; + int y = he^llo; + )cpp", + R"cpp( + // disabled preprocessor sections + int [[hello]]; + #if 0 + int y = ^hello; + #endif + )cpp", + R"cpp( + // comments + // he^llo, world + int [[hello]]; + )cpp", + R"cpp( + // not triggered by string literals + int hello; + const char* greeting = "h^ello, world"; + )cpp", + + R"cpp( + // can refer to macro invocations + #define INT int + [[INT]] x; + // I^NT + )cpp", + + R"cpp( + // can refer to macro invocations (even if they expand to nothing) + #define EMPTY + [[EMPTY]] int x; + // E^MPTY + )cpp", + + R"cpp( + // prefer nearest occurrence, backwards is worse than forwards + int hello; + int x = hello; + // h^ello + int y = [[hello]]; + int z = hello; + )cpp", + + R"cpp( + // short identifiers find near results + int [[hi]]; + // h^i + )cpp", + R"cpp( + // short identifiers don't find far results + int hi; + + + + // h^i + )cpp", + }; + for (const char *Test : Tests) { + Annotations T(Test); + auto AST = TestTU::withCode(T.code()).build(); + const auto &SM = AST.getSourceManager(); + llvm::Optional Nearby; + auto Word = + SpelledWord::touching(cantFail(sourceLocationInMainFile(SM, T.point())), + AST.getTokens(), AST.getLangOpts()); + if (!Word) { + ADD_FAILURE() << "No word at point! " << Test; + continue; + } + if (const auto *Tok = findNearbyIdentifier(*Word, AST.getTokens())) + Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange( + Tok->location(), Tok->endLocation())); + if (T.ranges().empty()) + EXPECT_THAT(Nearby, Eq(llvm::None)) << Test; + else + EXPECT_EQ(Nearby, T.range()) << Test; + } +} + TEST(FindReferences, WithinAST) { const char *Tests[] = { R"cpp(// Local variable