Index: clang-tools-extra/trunk/clangd/SourceCode.h =================================================================== --- clang-tools-extra/trunk/clangd/SourceCode.h +++ clang-tools-extra/trunk/clangd/SourceCode.h @@ -20,8 +20,9 @@ #include "clang/Basic/SourceManager.h" #include "clang/Format/Format.h" #include "clang/Tooling/Core/Replacement.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Support/SHA1.h" namespace clang { class SourceManager; Index: clang-tools-extra/trunk/clangd/SourceCode.cpp =================================================================== --- clang-tools-extra/trunk/clangd/SourceCode.cpp +++ clang-tools-extra/trunk/clangd/SourceCode.cpp @@ -12,6 +12,8 @@ #include "Logger.h" #include "Protocol.h" #include "clang/AST/ASTContext.h" +#include "clang/Basic/LangOptions.h" +#include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/TokenKinds.h" #include "clang/Format/Format.h" @@ -244,20 +246,106 @@ return L == R.getEnd() || halfOpenRangeContains(Mgr, R, L); } -llvm::Optional toHalfOpenFileRange(const SourceManager &Mgr, +static unsigned getTokenLengthAtLoc(SourceLocation Loc, const SourceManager &SM, + const LangOptions &LangOpts) { + Token TheTok; + if (Lexer::getRawToken(Loc, TheTok, SM, LangOpts)) + return 0; + // FIXME: Here we check whether the token at the location is a greatergreater + // (>>) token and consider it as a single greater (>). This is to get it + // working for templates but it isn't correct for the right shift operator. We + // can avoid this by using half open char ranges in getFileRange() but getting + // token ending is not well supported in macroIDs. + if (TheTok.is(tok::greatergreater)) + return 1; + return TheTok.getLength(); +} + +// Returns location of the last character of the token at a given loc +static SourceLocation getLocForTokenEnd(SourceLocation BeginLoc, + const SourceManager &SM, + const LangOptions &LangOpts) { + unsigned Len = getTokenLengthAtLoc(BeginLoc, SM, LangOpts); + return BeginLoc.getLocWithOffset(Len ? Len - 1 : 0); +} + +// Returns location of the starting of the token at a given EndLoc +static SourceLocation getLocForTokenBegin(SourceLocation EndLoc, + const SourceManager &SM, + const LangOptions &LangOpts) { + return EndLoc.getLocWithOffset( + -(signed)getTokenLengthAtLoc(EndLoc, SM, LangOpts)); +} + +// Converts a char source range to a token range. +static SourceRange toTokenRange(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts) { + if (!Range.isTokenRange()) + Range.setEnd(getLocForTokenBegin(Range.getEnd(), SM, LangOpts)); + return Range.getAsRange(); +} +// Returns the union of two token ranges. +// To find the maximum of the Ends of the ranges, we compare the location of the +// last character of the token. +static SourceRange unionTokenRange(SourceRange R1, SourceRange R2, + const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation E1 = getLocForTokenEnd(R1.getEnd(), SM, LangOpts); + SourceLocation E2 = getLocForTokenEnd(R2.getEnd(), SM, LangOpts); + return SourceRange(std::min(R1.getBegin(), R2.getBegin()), + E1 < E2 ? R2.getEnd() : R1.getEnd()); +} + +// Returns the tokenFileRange for a given Location as a Token Range +// This is quite similar to getFileLoc in SourceManager as both use +// getImmediateExpansionRange and getImmediateSpellingLoc (for macro IDs). +// However: +// - We want to maintain the full range information as we move from one file to +// the next. getFileLoc only uses the BeginLoc of getImmediateExpansionRange. +// - We want to split '>>' tokens as the lexer parses the '>>' in template +// instantiations as a '>>' instead of a '>'. +// There is also getExpansionRange but it simply calls +// getImmediateExpansionRange on the begin and ends separately which is wrong. +static SourceRange getTokenFileRange(SourceLocation Loc, + const SourceManager &SM, + const LangOptions &LangOpts) { + SourceRange FileRange = Loc; + while (!FileRange.getBegin().isFileID()) { + assert(!FileRange.getEnd().isFileID() && + "Both Begin and End should be MacroIDs."); + if (SM.isMacroArgExpansion(FileRange.getBegin())) { + FileRange.setBegin(SM.getImmediateSpellingLoc(FileRange.getBegin())); + FileRange.setEnd(SM.getImmediateSpellingLoc(FileRange.getEnd())); + } else { + SourceRange ExpansionRangeForBegin = toTokenRange( + SM.getImmediateExpansionRange(FileRange.getBegin()), SM, LangOpts); + SourceRange ExpansionRangeForEnd = toTokenRange( + SM.getImmediateExpansionRange(FileRange.getEnd()), SM, LangOpts); + FileRange = unionTokenRange(ExpansionRangeForBegin, ExpansionRangeForEnd, + SM, LangOpts); + } + } + return FileRange; +} + +llvm::Optional toHalfOpenFileRange(const SourceManager &SM, const LangOptions &LangOpts, SourceRange R) { - auto Begin = Mgr.getFileLoc(R.getBegin()); - if (Begin.isInvalid()) + SourceRange R1 = getTokenFileRange(R.getBegin(), SM, LangOpts); + if (!isValidFileRange(SM, R1)) return llvm::None; - auto End = Mgr.getFileLoc(R.getEnd()); - if (End.isInvalid()) + + SourceRange R2 = getTokenFileRange(R.getEnd(), SM, LangOpts); + if (!isValidFileRange(SM, R2)) return llvm::None; - End = Lexer::getLocForEndOfToken(End, 0, Mgr, LangOpts); - SourceRange Result(Begin, End); - if (!isValidFileRange(Mgr, Result)) + SourceRange Result = unionTokenRange(R1, R2, SM, LangOpts); + unsigned TokLen = getTokenLengthAtLoc(Result.getEnd(), SM, LangOpts); + // Convert from closed token range to half-open (char) range + Result.setEnd(Result.getEnd().getLocWithOffset(TokLen)); + if (!isValidFileRange(SM, Result)) return llvm::None; + return Result; } @@ -611,7 +699,6 @@ Found.push_back(Used.getKey()); } - llvm::sort(Found, [&](const std::string &LHS, const std::string &RHS) { if (Current == RHS) return false; Index: clang-tools-extra/trunk/clangd/unittests/SourceCodeTests.cpp =================================================================== --- clang-tools-extra/trunk/clangd/unittests/SourceCodeTests.cpp +++ clang-tools-extra/trunk/clangd/unittests/SourceCodeTests.cpp @@ -10,6 +10,7 @@ #include "Protocol.h" #include "SourceCode.h" #include "TestTU.h" +#include "clang/Basic/LangOptions.h" #include "clang/Format/Format.h" #include "llvm/Support/Error.h" #include "llvm/Support/raw_os_ostream.h" @@ -421,6 +422,51 @@ EXPECT_THAT(*Result, MacroName("MACRO")); } +// Test for functions toHalfOpenFileRange and getHalfOpenFileRange +TEST(SourceCodeTests, HalfOpenFileRange) { + // Each marked range should be the file range of the decl with the same name + // and each name should be unique. + Annotations Test(R"cpp( + #define FOO(X, Y) int Y = ++X + #define BAR(X) X + 1 + #define ECHO(X) X + template + class P {}; + void f() { + $a[[P>>>> a]]; + $b[[int b = 1]]; + $c[[FOO(b, c)]]; + $d[[FOO(BAR(BAR(b)), d)]]; + // FIXME: We might want to select everything inside the outer ECHO. + ECHO(ECHO($e[[int) ECHO(e]])); + } + )cpp"); + + ParsedAST AST = TestTU::withCode(Test.code()).build(); + llvm::errs() << Test.code(); + const SourceManager &SM = AST.getSourceManager(); + const LangOptions &LangOpts = AST.getASTContext().getLangOpts(); + // Turn a SourceLocation into a pair of positions + auto SourceRangeToRange = [&SM](SourceRange SrcRange) { + return Range{sourceLocToPosition(SM, SrcRange.getBegin()), + sourceLocToPosition(SM, SrcRange.getEnd())}; + }; + auto CheckRange = [&](llvm::StringRef Name) { + const NamedDecl &Decl = findUnqualifiedDecl(AST, Name); + auto FileRange = toHalfOpenFileRange(SM, LangOpts, Decl.getSourceRange()); + SCOPED_TRACE("Checking range: " + Name); + ASSERT_NE(FileRange, llvm::None); + Range HalfOpenRange = SourceRangeToRange(*FileRange); + EXPECT_EQ(HalfOpenRange, Test.ranges(Name)[0]); + }; + + CheckRange("a"); + CheckRange("b"); + CheckRange("c"); + CheckRange("d"); + CheckRange("e"); +} + } // namespace } // namespace clangd } // namespace clang