diff --git a/clang-tools-extra/clangd/XRefs.h b/clang-tools-extra/clangd/XRefs.h --- a/clang-tools-extra/clangd/XRefs.h +++ b/clang-tools-extra/clangd/XRefs.h @@ -26,6 +26,10 @@ #include namespace clang { +namespace syntax { +class Token; +class TokenBuffer; +} // namespace syntax namespace clangd { class ParsedAST; @@ -49,6 +53,13 @@ std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index = nullptr); +// If SpellingLoc points at a "word" that does not correspond to an expanded +// token (e.g. in a comment, a string, or a PP disabled region), then try to +// find a close occurrence of that word that does. +// (This is for internal use by locateSymbolAt, and is exposed for testing). +const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc, + const syntax::TokenBuffer &TB); + /// Get all document links std::vector getDocumentLinks(ParsedAST &AST); diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -28,6 +28,7 @@ #include "clang/AST/DeclTemplate.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/Type.h" +#include "clang/Basic/CharInfo.h" #include "clang/Basic/LLVM.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" @@ -44,6 +45,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" @@ -313,6 +315,100 @@ return Result; } +static bool tokenSpelledAt(SourceLocation SpellingLoc, + const syntax::TokenBuffer &TB) { + auto ExpandedTokens = TB.expandedTokens( + TB.sourceManager().getMacroArgExpandedLocation(SpellingLoc)); + return !ExpandedTokens.empty(); +} + +static llvm::StringRef wordTouching(llvm::StringRef Code, unsigned Offset) { + unsigned B = Offset, E = Offset; + while (B > 0 && isIdentifierBody(Code[B - 1])) + --B; + while (E < Code.size() && isIdentifierBody(Code[E])) + ++E; + return Code.slice(B, E); +} + +const syntax::Token *findNearbyIdentifier(SourceLocation SpellingLoc, + const syntax::TokenBuffer &TB) { + const SourceManager &SM = TB.sourceManager(); + auto Pos = SM.getDecomposedLoc(SpellingLoc); + llvm::StringRef Code = SM.getBufferData(Pos.first); + llvm::StringRef Word = wordTouching(Code, Pos.second); + if (Word.empty()) + return nullptr; + unsigned WordOffset = Word.data() - Code.data(); + SourceLocation WordStart = SM.getComposedLoc(Pos.first, WordOffset); + // If this is a real token that survived preprocessing, don't use heuristics. + auto WordExpandedTokens = + TB.expandedTokens(SM.getMacroArgExpandedLocation(WordStart)); + if (!WordExpandedTokens.empty()) + return nullptr; + + // We prefer the closest possible token, line-wise. Backwards is penalized. + // Ties are implicitly broken by traversal order (first-one-wins). + unsigned WordLine = SM.getLineNumber(Pos.first, WordOffset); + auto Cost = [&](SourceLocation Loc) -> unsigned { + assert(SM.getFileID(Loc) == Pos.first && "spelled token in wrong file?"); + unsigned Line = SM.getLineNumber(Pos.first, SM.getFileOffset(Loc)); + if (Line > WordLine) + return 1 + llvm::Log2_64(Line - WordLine); + if (Line < WordLine) + return 2 + llvm::Log2_64(WordLine - Line); + return 0; + }; + const syntax::Token *BestTok = nullptr; + // Search bounds are based on word length: 2^N lines forward. + unsigned BestCost = Word.size() + 1; + + // Updates BestTok and BestCost if Tok is a good candidate. + // May return true if the cost is too high for this token. + auto Consider = [&](const syntax::Token &Tok) { + if(!(Tok.kind() == tok::identifier && Tok.text(SM) == Word)) + return false; + // No point guessing the same location we started with. + if (Tok.location() == WordStart) + return false; + // We've done cheap checks, compute cost so we can break the caller's loop. + unsigned TokCost = Cost(Tok.location()); + if (TokCost >= BestCost) + return true; // causes the outer loop to break. + // Allow locations that might be part of the AST, and macros (even if empty) + // but not things like disabled preprocessor sections. + if (!(tokenSpelledAt(Tok.location(), TB) || TB.expansionStartingAt(&Tok))) + return false; + // We already verified this token is an improvement. + BestCost = TokCost; + BestTok = &Tok; + return false; + }; + auto SpelledTokens = TB.spelledTokens(Pos.first); + // Find where the word occurred in the token stream, to search forward & back. + auto *I = llvm::partition_point(SpelledTokens, [&](const syntax::Token &T) { + assert(SM.getFileID(T.location()) == SM.getFileID(WordStart)); + return T.location() >= WordStart; // Comparison OK: same file. + }); + // Search for matches after the cursor. + for (const syntax::Token &Tok : llvm::makeArrayRef(I, SpelledTokens.end())) + if (Consider(Tok)) + break; // costs of later tokens are greater... + // Search for matches before the cursor. + for (const syntax::Token &Tok : + llvm::reverse(llvm::makeArrayRef(SpelledTokens.begin(), I))) + if (Consider(Tok)) + break; + + if (BestTok) + vlog( + "Word {0} under cursor {1} isn't a token (after PP), trying nearby {2}", + Word, WordStart.printToString(SM), + BestTok->location().printToString(SM)); + + return BestTok; +} + std::vector locateSymbolAt(ParsedAST &AST, Position Pos, const SymbolIndex *Index) { const auto &SM = AST.getSourceManager(); @@ -343,8 +439,22 @@ // expansion.) return {*std::move(Macro)}; - return locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, - Index); + auto ASTResults = + locateASTReferent(*CurLoc, TouchedIdentifier, AST, *MainFilePath, Index); + if (!ASTResults.empty()) + return ASTResults; + + if (const syntax::Token *NearbyIdent = + findNearbyIdentifier(*CurLoc, AST.getTokens())) { + if (auto Macro = locateMacroReferent(*NearbyIdent, AST, *MainFilePath)) + return {*std::move(Macro)}; + ASTResults = locateASTReferent(NearbyIdent->location(), NearbyIdent, AST, + *MainFilePath, Index); + if (!ASTResults.empty()) + return ASTResults; + } + + return {}; } std::vector getDocumentLinks(ParsedAST &AST) { diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -38,6 +38,7 @@ namespace { using ::testing::ElementsAre; +using ::testing::Eq; using ::testing::IsEmpty; using ::testing::Matcher; using ::testing::UnorderedElementsAreArray; @@ -342,7 +343,7 @@ R"cpp(// Symbol concatenated inside macro (not supported) int *pi; #define POINTER(X) p ## X; - int i = *POINTER(^i); + int x = *POINTER(^i); )cpp", R"cpp(// Forward class declaration @@ -855,6 +856,88 @@ ElementsAre(Sym("foo", FooWithoutHeader.range()))); } +TEST(LocateSymbol, NearbyTokenSmoke) { + auto T = Annotations(R"cpp( + // prints e^rr and crashes + void die(const char* [[err]]); + )cpp"); + auto AST = TestTU::withCode(T.code()).build(); + // We don't pass an index, so can't hit index-based fallback. + EXPECT_THAT(locateSymbolAt(AST, T.point()), + ElementsAre(Sym("err", T.range()))); +} + +TEST(LocateSymbol, NearbyIdentifier) { + const char *Tests[] = { + R"cpp( + // regular identifiers (won't trigger) + int hello; + int y = he^llo; + )cpp", + R"cpp( + // disabled preprocessor sections + int [[hello]]; + #if 0 + int y = ^hello; + #endif + )cpp", + R"cpp( + // comments + // he^llo, world + int [[hello]]; + )cpp", + R"cpp( + // string literals + int [[hello]]; + const char* greeting = "h^ello, world"; + )cpp", + + R"cpp( + // can refer to macro invocations (even if they expand to nothing) + #define INT int + [[INT]] x; + // I^NT + )cpp", + + R"cpp( + // prefer nearest occurrence + int hello; + int x = hello; + // h^ello + int y = [[hello]]; + int z = hello; + )cpp", + + R"cpp( + // short identifiers find near results + int [[hi]]; + // h^i + )cpp", + R"cpp( + // short identifiers don't find far results + int hi; + + + + // h^i + )cpp", + }; + for (const char* Test : Tests) { + Annotations T(Test); + auto AST = TestTU::withCode(T.code()).build(); + const auto &SM = AST.getSourceManager(); + llvm::Optional Nearby; + if (const auto*Tok = findNearbyIdentifier( + cantFail(sourceLocationInMainFile(SM, T.point())), AST.getTokens())) + Nearby = halfOpenToRange(SM, CharSourceRange::getCharRange( + Tok->location(), Tok->endLocation())); + if (T.ranges().empty()) + EXPECT_THAT(Nearby, Eq(llvm::None)) << Test; + else + EXPECT_THAT(Nearby, T.range()) << Test; + } +} + TEST(FindReferences, WithinAST) { const char *Tests[] = { R"cpp(// Local variable