diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp --- a/clang-tools-extra/clangd/SourceCode.cpp +++ b/clang-tools-extra/clangd/SourceCode.cpp @@ -237,6 +237,45 @@ return halfOpenToRange(SM, CharSourceRange::getCharRange(TokLoc, End)); } +namespace { + +enum TokenKind { Identifier, Operator, Whitespace, Other }; + +static bool isOverloadedOperator(const Token &Tok) { + switch (Tok.getKind()) { +#define OVERLOADED_OPERATOR(Name, Spelling, Token, Unary, Binary, MemOnly) \ + case tok::Token: +#define OVERLOADED_OPERATOR_MULTI(Name, Spelling, Unary, Binary, MemOnly) +#include "clang/Basic/OperatorKinds.def" + return true; + + default: + break; + } + return false; +} + +TokenKind getTokenKind(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + Token Tok; + Tok.setKind(tok::TokenKind::NUM_TOKENS); + if (Lexer::getRawToken(Loc, Tok, SM, LangOpts, + /*IgnoreWhiteSpace*/ false)) + return Other; + + // getRawToken will return false without setting Tok when the token is + // whitespace, so if the flag is not set, we are sure this is a whitespace. + if (Tok.is(tok::TokenKind::NUM_TOKENS)) + return Whitespace; + if (Tok.is(tok::TokenKind::raw_identifier)) + return Identifier; + if (isOverloadedOperator(Tok)) + return Operator; + return Other; +} + +} // namespace + SourceLocation getBeginningOfIdentifier(const Position &Pos, const SourceManager &SM, const LangOptions &LangOpts) { @@ -247,27 +286,57 @@ return SourceLocation(); } - // GetBeginningOfToken(pos) is almost what we want, but does the wrong thing - // if the cursor is at the end of the identifier. - // Instead, we lex at GetBeginningOfToken(pos - 1). The cases are: - // 1) at the beginning of an identifier, we'll be looking at something - // that isn't an identifier. - // 2) at the middle or end of an identifier, we get the identifier. - // 3) anywhere outside an identifier, we'll get some non-identifier thing. - // We can't actually distinguish cases 1 and 3, but returning the original - // location is correct for both! + // GetBeginningOfToken(InputLoc) is almost what we want, but does the wrong + // thing if the cursor is at the end of the token (identifier or operator). + // The cases are: + // 1) at the beginning of the token + // 2) at the middle of the token + // 3) at the end of the token + // 4) anywhere outside the identifier or operator + // To distinguish all cases, we lex both at the + // GetBeginningOfToken(InputLoc-1) and GetBeginningOfToken(InputLoc), for + // cases 1 and 4, we just return the original location. SourceLocation InputLoc = SM.getComposedLoc(FID, *Offset); - if (*Offset == 0) // Case 1 or 3. + if (*Offset == 0) // Case 1 or 4. return InputLoc; SourceLocation Before = SM.getComposedLoc(FID, *Offset - 1); + SourceLocation BeforeTokBeginning = + Lexer::GetBeginningOfToken(Before, SM, LangOpts); + TokenKind BeforeKind = getTokenKind(BeforeTokBeginning, SM, LangOpts); + + SourceLocation CurrentTokBeginning = + Lexer::GetBeginningOfToken(InputLoc, SM, LangOpts); + TokenKind CurrentKind = getTokenKind(CurrentTokBeginning, SM, LangOpts); + + // Case 2. + if (BeforeTokBeginning == CurrentTokBeginning) { + // For interesting token, we return the beginning of the token. + if (CurrentKind == Identifier || CurrentKind == Operator) + return CurrentTokBeginning; + // otherwise, we return the original loc. + return InputLoc; + } - Before = Lexer::GetBeginningOfToken(Before, SM, LangOpts); - Token Tok; - if (Before.isValid() && - !Lexer::getRawToken(Before, Tok, SM, LangOpts, false) && - Tok.is(tok::raw_identifier)) - return Before; // Case 2. - return InputLoc; // Case 1 or 3. + // Whitespace is not interesting. + if (BeforeKind == Whitespace) + return CurrentTokBeginning; + if (CurrentKind == Whitespace) + return BeforeTokBeginning; + + // Handle case 1 and 3. Note that the cursor could be at the token boundary, + // e.g. "Before^Current", we prefer identifiers to other tokens. + if (CurrentKind == Identifier) + return CurrentTokBeginning; + if (BeforeKind == Identifier) + return BeforeTokBeginning; + // Then prefer overloaded operators to other tokens. + if (CurrentKind == Operator) + return CurrentTokBeginning; + if (BeforeKind == Operator) + return BeforeTokBeginning; + + // Case 4, we just return the original location. + return InputLoc; } bool isValidFileRange(const SourceManager &Mgr, SourceRange R) { diff --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp --- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp +++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp @@ -319,14 +319,28 @@ Bar* bar; )cpp"; // First ^ is the expected beginning, last is the search position. - for (std::string Text : std::vector{ + for (const std::string &Text : std::vector{ "int ^f^oo();", // inside identifier "int ^foo();", // beginning of identifier "int ^foo^();", // end of identifier "int foo(^);", // non-identifier "^int foo();", // beginning of file (can't back up) "int ^f0^0();", // after a digit (lexing at N-1 is wrong) - "int ^λλ^λ();", // UTF-8 handled properly when backing up + "/^/ comments", // non-interesting token + "void f(int abc) { abc ^ ++; }", // whitespace + "void f(int abc) { ^abc^++; }", // range of identifier + "void f(int abc) { ++^abc^; }", // range of identifier + "void f(int abc) { ^+^+abc; }", // range of operator + "void f(int abc) { ^abc^ ++; }", // range of identifier + "void f(int abc) { abc ^++^; }", // range of operator + "void f(int abc) { ^++^ abc; }", // range of operator + "void f(int abc) { ++ ^abc^; }", // range of identifier + "void f(int abc) { ^++^/**/abc; }", // range of operator + "void f(int abc) { ++/**/^abc; }", // range of identifier + "void f(int abc) { ^abc^/**/++; }", // range of identifier + "void f(int abc) { abc/**/^++; }", // range of operator + "void f() {^ }", // outside of identifier and operator + "int ^λλ^λ();", // UTF-8 handled properly when backing up // identifier in macro arg "MACRO(bar->^func())", // beginning of identifier diff --git a/clang-tools-extra/clangd/unittests/XRefsTests.cpp b/clang-tools-extra/clangd/unittests/XRefsTests.cpp --- a/clang-tools-extra/clangd/unittests/XRefsTests.cpp +++ b/clang-tools-extra/clangd/unittests/XRefsTests.cpp @@ -441,6 +441,15 @@ auto x = m^akeX(); } )cpp", + + R"cpp( + struct X { + X& [[operator]]++() {} + }; + void foo(X& x) { + +^+x; + } + )cpp", }; for (const char *Test : Tests) { Annotations T(Test);