diff --git a/clang-tools-extra/clangd/Selection.h b/clang-tools-extra/clangd/Selection.h --- a/clang-tools-extra/clangd/Selection.h +++ b/clang-tools-extra/clangd/Selection.h @@ -76,7 +76,7 @@ unsigned Start, unsigned End); // Describes to what extent an AST node is covered by the selection. - enum Selection { + enum Selection : unsigned char { // The AST node owns no characters covered by the selection. // Note that characters owned by children don't count: // if (x == 0) scream(); diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -34,6 +34,24 @@ using Node = SelectionTree::Node; using ast_type_traits::DynTypedNode; +// Sentinel value for the selectedness of a node where we've seen no tokens yet. +// Once we see a token, treated as fully-selected (so far). If we never see a +// token, it folds into Unselected. These are never exposed publicly, +constexpr SelectionTree::Selection NoTokens = + static_cast( + static_cast(SelectionTree::Complete + 1)); + +// Nodes start start with NoTokens, and then use this function to aggregate +// the selectedness as more tokens are found. +void update(SelectionTree::Selection &Result, SelectionTree::Selection New) { + assert(New != NoTokens); + if (Result == NoTokens) + Result = New; + else if (Result != New) + // Can only be completely selected (or unselected) if all tokens are. + Result = SelectionTree::Partial; +} + // Identifies which tokens are selected, and evaluates claims of source ranges // by AST nodes. Tokens may be claimed only once: first-come, first-served. class SelectedTokens { @@ -102,13 +120,20 @@ } } - // If some tokens were previously claimed (Result != Unselected), we may - // upgrade from Partial->Complete, even if no new tokens were claimed. - // Important for [[int a]]. - if (ClaimedAnyToken || Result) { - Result = std::max(Result, PartialSelection ? SelectionTree::Partial - : SelectionTree::Complete); - } + if (ClaimedAnyToken) + update(Result, PartialSelection ? SelectionTree::Partial + : SelectionTree::Complete); + } + + // Checks whether the token at Offset is selected, and updates Result. + // Does not claim the token. + void peek(unsigned Offset, SelectionTree::Selection &Result) const { + // Find the token, if it exists. + auto I = llvm::partition_point( + Tokens, [&](const TokInfo &Tok) { return Tok.EndOffset <= Offset; }); + if (I == Tokens.end() || I->StartOffset != Offset) + return; + update(Result, I->Selected); } private: @@ -195,16 +220,6 @@ V.TraverseAST(AST); assert(V.Stack.size() == 1 && "Unpaired push/pop?"); assert(V.Stack.top() == &V.Nodes.front()); - // We selected TUDecl if tokens were unclaimed (or the file is empty). - SelectionTree::Selection UnclaimedTokens = SelectionTree::Unselected; - V.Claimed.claim(Begin, End, UnclaimedTokens); - if (UnclaimedTokens || V.Nodes.size() == 1) { - StringRef FileContent = AST.getSourceManager().getBufferData(File); - // Don't require the trailing newlines to be selected. - bool SelectedAll = Begin == 0 && End >= FileContent.rtrim().size(); - V.Stack.top()->Selected = - SelectedAll ? SelectionTree::Complete : SelectionTree::Partial; - } return std::move(V.Nodes); } @@ -289,6 +304,7 @@ #ifndef NDEBUG PrintPolicy(PP), #endif + Tokens(Tokens), Claimed(Tokens.spelledTokens(SelFile), SM, SelBegin, SelEnd), SelFile(SelFile), SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken( @@ -377,11 +393,9 @@ Nodes.emplace_back(); Nodes.back().ASTNode = std::move(Node); Nodes.back().Parent = Stack.top(); + Nodes.back().Selected = NoTokens; Stack.push(&Nodes.back()); claimRange(Early, Nodes.back().Selected); - // Early hit detection never selects the whole node. - if (Nodes.back().Selected) - Nodes.back().Selected = SelectionTree::Partial; } // Pops a node off the ancestor stack, and finalizes it. Pairs with push(). @@ -390,6 +404,8 @@ Node &N = *Stack.top(); dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); claimRange(N.ASTNode.getSourceRange(), N.Selected); + if (N.Selected == NoTokens) + N.Selected = SelectionTree::Unselected; if (N.Selected || !N.Children.empty()) { // Attach to the tree. N.Parent->Children.push_back(&N); @@ -426,29 +442,75 @@ void claimRange(SourceRange S, SelectionTree::Selection &Result) { if (!S.isValid()) return; - // toHalfOpenFileRange() allows selection of constructs in macro args. e.g: - // #define LOOP_FOREVER(Body) for(;;) { Body } - // void IncrementLots(int &x) { - // LOOP_FOREVER( ++x; ) - // } - // Selecting "++x" or "x" will do the right thing. - auto Range = toHalfOpenFileRange(SM, LangOpts, S); - assert(Range && "We should be able to get the File Range"); - dlog("{1}claimRange: {0}", Range->printToString(SM), indent()); - auto B = SM.getDecomposedLoc(Range->getBegin()); - auto E = SM.getDecomposedLoc(Range->getEnd()); - // Otherwise, nodes in macro expansions can't be selected. - if (B.first != SelFile || E.first != SelFile) - return; - // Attempt to claim the remaining range. If there's nothing to claim, only - // children were selected. - Claimed.claim(B.second, E.second, Result); + + // We need to iterate over all the expanded tokens that are part of S. + // Consider VarDecl created by the macro expansion FLAG(x) -> int x = 0; + // Neither S.getBegin() nor S.getEnd() are arg expansions, but x is. + // The selection FLAG([[x]]) must partially select the VarDecl. + llvm::ArrayRef Remaining = Tokens.expandedTokens(S); + while (!Remaining.empty()) { + // Take consecutive tokens from the same context together for efficiency. + FileID FID = SM.getFileID(Remaining.front().location()); + auto Batch = Remaining.take_while([&](const syntax::Token &T) { + return SM.getFileID(T.location()) == FID; + }); + assert(!Batch.empty()); + Remaining = Remaining.drop_front(Batch.size()); + + // There are several possible categories of FileID depending on how the + // preprocessor was used to generate these tokens: + // main file, #included file, macro args, macro bodies. + // We need to identify the main-file tokens that represent Batch, and + // determine whether we want to exclusively claim them. Regular tokens + // represent one AST construct, but a macro invocation can represent many. + + // Handle tokens written directly in the main file. + // Claim the token exclusively for this node. + if (FID == SelFile) { + Claimed.claim(SM.getFileOffset(Batch.front().location()), + SM.getFileOffset(Batch.back().location()) + + Batch.back().length(), + Result); + continue; + } + + // Handle tokens in another file #included into the main file. + // Check if the #include is selected, but don't claim it exclusively. + if (Batch.front().location().isFileID()) { + for (SourceLocation Loc = Batch.front().location(); Loc.isValid(); + Loc = SM.getIncludeLoc(SM.getFileID(Loc))) { + if (SM.getFileID(Loc) == SelFile) { + Claimed.peek(SM.getFileOffset(Loc), Result); + break; + } + } + continue; + } + + // (We're definitely in a macro expansion at this point.) + + SourceLocation ArgStart = + SM.getTopMacroCallerLoc(Batch.front().location()); + // Handle tokens that were passed as a macro argument. + // Claim the token exclusively for this node. + // FIXME: this prevents selecting both occurrences of args used twice. + if (SM.getFileID(ArgStart) == SelFile) { + SourceLocation ArgEnd = + SM.getTopMacroCallerLoc(Batch.back().location()); + Claimed.claim(SM.getFileOffset(ArgStart), + SM.getFileOffset(ArgEnd) + Batch.back().length(), Result); + continue; + } + + // Handle tokens produced by non-argument macro expansion. + // Check if the macro name is selected, don't claim it exclusively. + auto Expansion = SM.getDecomposedExpansionLoc(S.getBegin()); + if (Expansion.first == SelFile) + // FIXME: also check ( and ) for function-like macros. + Claimed.peek(Expansion.second, Result); + } if (Result) - dlog("{1}hit selection: {0}", - SourceRange(SM.getComposedLoc(B.first, B.second), - SM.getComposedLoc(E.first, E.second)) - .printToString(SM), - indent()); + dlog("{1}hit selection: {0}", S.printToString(SM), indent()); } std::string indent(int Offset = 0) { @@ -464,6 +526,7 @@ const PrintingPolicy &PrintPolicy; #endif std::stack Stack; + const syntax::TokenBuffer &Tokens; SelectedTokens Claimed; std::deque Nodes; // Stable pointers as we add more nodes. FileID SelFile; diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -134,6 +134,15 @@ )cpp", "IfStmt", }, + { + R"cpp( + int x(int); + #define M(foo) x(foo) + int a = 42; + int b = M([[^a]]); + )cpp", + "DeclRefExpr", + }, { R"cpp( void foo(); @@ -386,10 +395,10 @@ void foo(^$C[[unique_ptr<$C[[unique_ptr<$C[[int]]>]]>]]^ a) {} )cpp", R"cpp(int a = [[5 >^> 1]];)cpp", - R"cpp([[ + R"cpp( #define ECHO(X) X - ECHO(EC^HO([[$C[[int]]) EC^HO(a]])); - ]])cpp", + ECHO(EC^HO($C[[int]]) EC^HO(a)); + )cpp", R"cpp( $C[[^$C[[int]] a^]]; )cpp", R"cpp( $C[[^$C[[int]] a = $C[[5]]^]]; )cpp", }; @@ -426,6 +435,20 @@ EXPECT_EQ("BreakStmt", T.commonAncestor()->kind()); EXPECT_EQ("WhileStmt", T.commonAncestor()->Parent->kind()); + + Case = R"cpp( +#define MACRO while(1) + void test() { +[[#include "Expand.inc"]] + br^eak; + } + )cpp"; + Test = Annotations(Case); + TU = TestTU::withCode(Test.code()); + AST = TU.build(); + T = makeSelectionTree(Case, AST); + + EXPECT_EQ("WhileStmt", T.commonAncestor()->kind()); } TEST(SelectionTest, Implicit) { diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -269,7 +269,7 @@ EXPECT_UNAVAILABLE(UnavailableCases); // vector of pairs of input and output strings - const std::vector> + const std::vector> InputOutputs = { // extraction from variable declaration/assignment {R"cpp(void varDecl() { @@ -321,17 +321,10 @@ if(1) LOOP(5 + [[3]]) })cpp", - /*FIXME: It should be extracted like this. SelectionTree needs to be - * fixed for macros. R"cpp(#define LOOP(x) while (1) {a = x;} - void f(int a) { - auto dummy = 3; if(1) - LOOP(5 + dummy) - })cpp"},*/ - R"cpp(#define LOOP(x) while (1) {a = x;} void f(int a) { - auto dummy = LOOP(5 + 3); if(1) - dummy + auto dummy = 3; if(1) + LOOP(5 + dummy) })cpp"}, {R"cpp(#define LOOP(x) do {x;} while(1); void f(int a) { @@ -644,13 +637,18 @@ )cpp"; EXPECT_EQ(apply(TemplateFailInput), "unavailable"); - // FIXME: This should be extractable after selectionTree works correctly for - // macros (currently it doesn't select anything for the following case) - std::string MacroFailInput = R"cpp( + std::string MacroInput = R"cpp( #define F(BODY) void f() { BODY } F ([[int x = 0;]]) )cpp"; - EXPECT_EQ(apply(MacroFailInput), "unavailable"); + std::string MacroOutput = R"cpp( + #define F(BODY) void f() { BODY } + void extracted() { +int x = 0; +} +F (extracted();) + )cpp"; + EXPECT_EQ(apply(MacroInput), MacroOutput); // Shouldn't crash. EXPECT_EQ(apply("void f([[int a]]);"), "unavailable"); diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -175,6 +175,7 @@ /// All tokens produced by the preprocessor after all macro replacements, /// directives, etc. Source locations found in the clang AST will always /// point to one of these tokens. + /// Tokens are in TU order (per SourceManager::isBeforeInTranslationUnit()). /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split /// into two '>' tokens by the parser. However, TokenBuffer currently /// keeps it as a single '>>' token. @@ -182,6 +183,10 @@ return ExpandedTokens; } + /// Returns the subrange of expandedTokens() corresponding to the closed + /// token range R. + llvm::ArrayRef expandedTokens(SourceRange R) const; + /// Find the subrange of spelled tokens that produced the corresponding \p /// Expanded tokens. /// diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -119,6 +119,22 @@ return Text.substr(Begin, length()); } +llvm::ArrayRef TokenBuffer::expandedTokens(SourceRange R) const { + if (R.isInvalid()) + return {}; + const Token *Begin = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); + }); + const Token *End = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); + }); + if (Begin > End) + return {}; + return {Begin, End}; +} + std::pair TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Annotations.h" #include "llvm/Testing/Support/SupportHelpers.h" +#include "gmock/gmock.h" #include #include #include @@ -663,6 +664,20 @@ ValueIs(SameRange(findSpelled("not_mapped")))); } +TEST_F(TokenBufferTest, ExpandedTokensForRange) { + recordTokens(R"cpp( + #define SIGN(X) X##_washere + A SIGN(B) C SIGN(D) E SIGN(F) G + )cpp"); + + SourceRange R(findExpanded("C").front().location(), + findExpanded("F_washere").front().location()); + // Sanity check: expanded and spelled tokens are stored separately. + EXPECT_THAT(Buffer.expandedTokens(R), + SameRange(findExpanded("C D_washere E F_washere"))); + EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); +} + TEST_F(TokenBufferTest, ExpansionStartingAt) { // Object-like macro expansions. recordTokens(R"cpp(