diff --git a/clang-tools-extra/clangd/Selection.h b/clang-tools-extra/clangd/Selection.h --- a/clang-tools-extra/clangd/Selection.h +++ b/clang-tools-extra/clangd/Selection.h @@ -76,7 +76,7 @@ unsigned Start, unsigned End); // Describes to what extent an AST node is covered by the selection. - enum Selection { + enum Selection : unsigned char { // The AST node owns no characters covered by the selection. // Note that characters owned by children don't count: // if (x == 0) scream(); diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp --- a/clang-tools-extra/clangd/Selection.cpp +++ b/clang-tools-extra/clangd/Selection.cpp @@ -34,95 +34,283 @@ using Node = SelectionTree::Node; using ast_type_traits::DynTypedNode; -// Identifies which tokens are selected, and evaluates claims of source ranges -// by AST nodes. Tokens may be claimed only once: first-come, first-served. -class SelectedTokens { +// An IntervalSet maintains a set of disjoint subranges of an array. +// +// Initially, it contains the entire array. +// [-----------------------------------------------------------] +// +// When a range is erased(), it will typically split the array in two. +// Claim: [--------------------] +// after: [----------------] [-------------------] +// +// erase() returns the segments actually erased. Given the state above: +// Claim: [---------------------------------------] +// Out: [---------] [------] +// After: [-----] [-----------] +// +// It is used to track (expanded) tokens not yet associated with an AST node. +// On traversing an AST node, its token range is erased from the unclaimed set. +// The tokens actually removed are associated with that node, and hit-tested +// against the selection to determine whether the node is selected. +template +class IntervalSet { +public: + IntervalSet(llvm::ArrayRef Range) : UnclaimedRanges(&rangeLess) { + UnclaimedRanges.insert(Range); + } + + // Removes the elements of Claim from the set, modifying or removing ranges + // that overlap it. + // Returns the continuous subranges of Claim that were actually removed. + llvm::SmallVector, 4> erase(llvm::ArrayRef Claim) { + llvm::SmallVector, 4> Out; + if (Claim.empty()) + return Out; + // equal_range finds overlapping ranges, because of how we chose <. + auto Overlap = UnclaimedRanges.equal_range(Claim); + if (Overlap.first == Overlap.second) + return Out; + + // General case: + // Claim: [-----------------] + // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-] + // Overlap: ^first ^second + // Ranges C and D are fully included. Ranges B and E must be trimmed. + + // First, copy all overlapping ranges into the output. + auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second); + // If any of the overlapping ranges were sliced by the claim, split them: + // - restrict the returned range to the claimed part + // - save the unclaimed part so it can be reinserted + llvm::ArrayRef RemainingHead, RemainingTail; + if (Claim.begin() > OutFirst->begin()) { + RemainingHead = {OutFirst->begin(), Claim.begin()}; + *OutFirst = {Claim.begin(), OutFirst->end()}; + } + if (Claim.end() < Out.back().end()) { + RemainingTail = {Claim.end(), Out.back().end()}; + Out.back() = {Out.back().begin(), Claim.end()}; + } + + // Erase all the overlapping ranges (invalidating all iterators). + UnclaimedRanges.erase(Overlap.first, Overlap.second); + // Reinsert ranges that were merely trimmed. + if (!RemainingHead.empty()) + UnclaimedRanges.insert(RemainingHead); + if (!RemainingTail.empty()) + UnclaimedRanges.insert(RemainingTail); + + return Out; + } + +private: + using TokenRange = llvm::ArrayRef; + // Given that the ranges we insert are disjoint, there are several ways to + // legally define range < range. + // We choose to define it so overlapping ranges compare equal. + static bool rangeLess(llvm::ArrayRef L, llvm::ArrayRef R) { + return L.end() <= R.begin(); + } + + // Disjoint sorted unclaimed ranges of expanded tokens. + std::set, decltype(&rangeLess)> UnclaimedRanges; +}; + +// Sentinel value for the selectedness of a node where we've seen no tokens yet. +// This resolves to Unselected if no tokens are ever seen. +// But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete. +// This value is never exposed publicly. +constexpr SelectionTree::Selection NoTokens = + static_cast( + static_cast(SelectionTree::Complete + 1)); + +// Nodes start with NoTokens, and then use this function to aggregate the +// selectedness as more tokens are found. +void update(SelectionTree::Selection &Result, SelectionTree::Selection New) { + if (New == NoTokens) + return; + if (Result == NoTokens) + Result = New; + else if (Result != New) + // Can only be completely selected (or unselected) if all tokens are. + Result = SelectionTree::Partial; +} + + +// SelectionTester can determine whether a range of tokens from the PP-expanded +// stream (corresponding to an AST node) is considered selected. +// +// When the tokens result from macro expansions, the appropriate tokens in the +// main file are examined (macro invocation or args). Similarly for #includes. +// +// It tests each token in the range (not just the endpoints) as contiguous +// expanded tokens may not have contiguous spellings (with macros). +// +// Non-token text, and tokens not modeled in the AST (comments, semicolons) +// are ignored when determining selectedness. +class SelectionTester { public: - SelectedTokens(llvm::ArrayRef Spelled, const SourceManager &SM, - unsigned SelBegin, unsigned SelEnd) - : SelBegin(SelBegin), SelEnd(SelEnd) { - // Extract bounds and selected-ness for all tokens spelled in the file. - Tokens.reserve(Spelled.size()); - for (const auto& Tok : Spelled) { + // The selection is offsets [SelBegin, SelEnd) in SelFile. + SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile, + unsigned SelBegin, unsigned SelEnd, const SourceManager &SM) + : SelFile(SelFile), SM(SM) { + // Find all tokens (partially) selected in the file. + auto AllSpelledTokens = Buf.spelledTokens(SelFile); + const syntax::Token *SelFirst = + llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) { + return SM.getFileOffset(Tok.endLocation()) <= SelBegin; + }); + const syntax::Token *SelLimit = std::partition_point( + SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) { + return SM.getFileOffset(Tok.location()) < SelEnd; + }); + // Precompute selectedness and offset for selected spelled tokens. + for (const syntax::Token *T = SelFirst; T < SelLimit; ++T) { // As well as comments, don't count semicolons as real tokens. // They're not properly claimed as expr-statement is missing from the AST. - if (Tok.kind() == tok::comment || Tok.kind() == tok::semi) + if (T->kind() == tok::comment || T->kind() == tok::semi) continue; - - Tokens.emplace_back(); - TokInfo &S = Tokens.back(); - S.StartOffset = SM.getFileOffset(Tok.location()); - S.EndOffset = S.StartOffset + Tok.length(); - if (S.StartOffset >= SelBegin && S.EndOffset <= SelEnd) + SpelledTokens.emplace_back(); + Tok &S = SpelledTokens.back(); + S.Offset = SM.getFileOffset(T->location()); + if (S.Offset >= SelBegin && S.Offset + T->length() <= SelEnd) S.Selected = SelectionTree::Complete; - else if (S.EndOffset > SelBegin && S.StartOffset < SelEnd) - S.Selected = SelectionTree::Partial; else - S.Selected = SelectionTree::Unselected; - S.Claimed = false; + S.Selected = SelectionTree::Partial; } } - // Associates any tokens overlapping [Begin, End) with an AST node. - // Tokens that were already claimed by another AST node are not claimed again. - // Updates Result if the node is selected in the sense of SelectionTree. - void claim(unsigned Begin, unsigned End, SelectionTree::Selection &Result) { - assert(Begin <= End); + // Test whether a consecutive range of tokens is selected. + // The tokens are taken from the expanded token stream. + SelectionTree::Selection + test(llvm::ArrayRef ExpandedTokens) const { + if (SpelledTokens.empty()) + return NoTokens; + SelectionTree::Selection Result = NoTokens; + while (!ExpandedTokens.empty()) { + // Take consecutive tokens from the same context together for efficiency. + FileID FID = SM.getFileID(ExpandedTokens.front().location()); + auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) { + return SM.getFileID(T.location()) == FID; + }); + assert(!Batch.empty()); + ExpandedTokens = ExpandedTokens.drop_front(Batch.size()); + + update(Result, testChunk(FID, Batch)); + } + return Result; + } - // Fast-path for missing the selection entirely. - if (Begin >= SelEnd || End <= SelBegin) - return; - - // We will consider the range (at least partially) selected if it hit any - // selected and previously unclaimed token. - bool ClaimedAnyToken = false; - // The selection is (at most) partial if: - // - any claimed token is partially selected - // - any token in the range is unselected - bool PartialSelection = false; - - // Find the first token that (maybe) overlaps the claimed range. - auto Start = llvm::partition_point(Tokens, [&](const TokInfo &Tok) { - return Tok.EndOffset <= Begin; - }); - // Iterate over every token that overlaps the range. - // Claim selected tokens, and update the two result flags. - for (auto It = Start; It != Tokens.end() && It->StartOffset < End; ++It) { - if (It->Selected) { - if (!It->Claimed) { - // Token is selected, in the node's range, and unclaimed; claim it. - It->Claimed = true; - ClaimedAnyToken = true; - // If the token was only partially selected, so is the node. - PartialSelection |= (It->Selected == SelectionTree::Partial); - } - } else { - // If the node covers an unselected token, it's not completely selected. - PartialSelection = true; + // Cheap check whether any of the tokens in R might be selected. + // If it returns false, test() will return NoTokens or Unselected. + // If it returns true, test() may return any value. + bool mayHit(SourceRange R) const { + if (SpelledTokens.empty()) + return false; + auto B = SM.getDecomposedLoc(R.getBegin()); + auto E = SM.getDecomposedLoc(R.getEnd()); + if (B.first == SelFile && E.first == SelFile) + if (E.second < SpelledTokens.front().Offset || + B.second > SpelledTokens.back().Offset) + return false; + return true; + } + +private: + // Hit-test a consecutive range of tokens from a single file ID. + SelectionTree::Selection + testChunk(FileID FID, llvm::ArrayRef Batch) const { + assert(!Batch.empty()); + SourceLocation StartLoc = Batch.front().location(); + // There are several possible categories of FileID depending on how the + // preprocessor was used to generate these tokens: + // main file, #included file, macro args, macro bodies. + // We need to identify the main-file tokens that represent Batch, and + // determine whether we want to exclusively claim them. Regular tokens + // represent one AST construct, but a macro invocation can represent many. + + // Handle tokens written directly in the main file. + if (FID == SelFile) { + return testTokenRange(SM.getFileOffset(Batch.front().location()), + SM.getFileOffset(Batch.back().location())); + } + + // Handle tokens in another file #included into the main file. + // Check if the #include is selected, but don't claim it exclusively. + if (StartLoc.isFileID()) { + for (SourceLocation Loc = Batch.front().location(); Loc.isValid(); + Loc = SM.getIncludeLoc(SM.getFileID(Loc))) { + if (SM.getFileID(Loc) == SelFile) + // FIXME: use whole #include directive, not just the filename string. + return testToken(SM.getFileOffset(Loc)); } + return NoTokens; } - // If some tokens were previously claimed (Result != Unselected), we may - // upgrade from Partial->Complete, even if no new tokens were claimed. - // Important for [[int a]]. - if (ClaimedAnyToken || Result) { - Result = std::max(Result, PartialSelection ? SelectionTree::Partial - : SelectionTree::Complete); + assert(StartLoc.isMacroID()); + // Handle tokens that were passed as a macro argument. + SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc); + if (SM.getFileID(ArgStart) == SelFile) { + SourceLocation ArgEnd = SM.getTopMacroCallerLoc(Batch.back().location()); + return testTokenRange(SM.getFileOffset(ArgStart), + SM.getFileOffset(ArgEnd)); } + + // Handle tokens produced by non-argument macro expansion. + // Check if the macro name is selected, don't claim it exclusively. + auto Expansion = SM.getDecomposedExpansionLoc(StartLoc); + if (Expansion.first == SelFile) + // FIXME: also check ( and ) for function-like macros? + return testToken(Expansion.second); + else + return NoTokens; } -private: - struct TokInfo { - unsigned StartOffset; - unsigned EndOffset; + // Is the closed token range [Begin, End] selected? + SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const { + assert(Begin <= End); + // Outside the selection entirely? + if (End < SpelledTokens.front().Offset || + Begin > SpelledTokens.back().Offset) + return SelectionTree::Unselected; + + // Compute range of tokens. + auto B = llvm::partition_point( + SpelledTokens, [&](const Tok &T) { return T.Offset < Begin; }); + auto E = std::partition_point( + B, SpelledTokens.end(), [&](const Tok &T) { return T.Offset <= End; }); + + // Aggregate selectedness of tokens in range. + bool ExtendsOutsideSelection = Begin < SpelledTokens.front().Offset || + End > SpelledTokens.back().Offset; + SelectionTree::Selection Result = + ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens; + for (auto It = B; It != E; ++It) + update(Result, It->Selected); + return Result; + } + + // Is the token at `Offset` selected? + SelectionTree::Selection testToken(unsigned Offset) const { + // Outside the selection entirely? + if (Offset < SpelledTokens.front().Offset || + Offset > SpelledTokens.back().Offset) + return SelectionTree::Unselected; + // Find the token, if it exists. + auto It = llvm::partition_point( + SpelledTokens, [&](const Tok &T) { return T.Offset < Offset; }); + if (It != SpelledTokens.end() && It->Offset == Offset) + return It->Selected; + return NoTokens; + } + + struct Tok { + unsigned Offset; SelectionTree::Selection Selected; - bool Claimed; - bool operator<(const TokInfo &Other) const { - return StartOffset < Other.StartOffset; - } }; - std::vector Tokens; - unsigned SelBegin, SelEnd; + std::vector SpelledTokens; + FileID SelFile; + const SourceManager &SM; }; // Show the type of a node for debugging. @@ -195,16 +383,6 @@ V.TraverseAST(AST); assert(V.Stack.size() == 1 && "Unpaired push/pop?"); assert(V.Stack.top() == &V.Nodes.front()); - // We selected TUDecl if tokens were unclaimed (or the file is empty). - SelectionTree::Selection UnclaimedTokens = SelectionTree::Unselected; - V.Claimed.claim(Begin, End, UnclaimedTokens); - if (UnclaimedTokens || V.Nodes.size() == 1) { - StringRef FileContent = AST.getSourceManager().getBufferData(File); - // Don't require the trailing newlines to be selected. - bool SelectedAll = Begin == 0 && End >= FileContent.rtrim().size(); - V.Stack.top()->Selected = - SelectedAll ? SelectionTree::Complete : SelectionTree::Partial; - } return std::move(V.Nodes); } @@ -289,11 +467,8 @@ #ifndef NDEBUG PrintPolicy(PP), #endif - Claimed(Tokens.spelledTokens(SelFile), SM, SelBegin, SelEnd), - SelFile(SelFile), - SelBeginTokenStart(SM.getFileOffset(Lexer::GetBeginningOfToken( - SM.getComposedLoc(SelFile, SelBegin), SM, LangOpts))), - SelEnd(SelEnd) { + TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM), + UnclaimedExpandedTokens(Tokens.expandedTokens()) { // Ensure we have a node for the TU decl, regardless of traversal scope. Nodes.emplace_back(); Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl()); @@ -346,18 +521,12 @@ // don't intersect the selection may be recursively skipped. bool canSafelySkipNode(const DynTypedNode &N) { SourceRange S = N.getSourceRange(); - auto B = SM.getDecomposedLoc(S.getBegin()); - auto E = SM.getDecomposedLoc(S.getEnd()); - // Node lies in a macro expansion? - if (B.first != SelFile || E.first != SelFile) - return false; - // Node intersects selection tokens? - if (B.second < SelEnd && E.second >= SelBeginTokenStart) - return false; - // Otherwise, allow skipping over the node. - dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent()); - dlog("{1}skipped range = {0}", S.printToString(SM), indent(1)); - return true; + if (!SelChecker.mayHit(S)) { + dlog("{1}skip: {0}", printNodeToString(N, PrintPolicy), indent()); + dlog("{1}skipped range = {0}", S.printToString(SM), indent(1)); + return true; + } + return false; } // There are certain nodes we want to treat as leaves in the SelectionTree, @@ -377,11 +546,9 @@ Nodes.emplace_back(); Nodes.back().ASTNode = std::move(Node); Nodes.back().Parent = Stack.top(); + Nodes.back().Selected = NoTokens; Stack.push(&Nodes.back()); claimRange(Early, Nodes.back().Selected); - // Early hit detection never selects the whole node. - if (Nodes.back().Selected) - Nodes.back().Selected = SelectionTree::Partial; } // Pops a node off the ancestor stack, and finalizes it. Pairs with push(). @@ -390,6 +557,8 @@ Node &N = *Stack.top(); dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1)); claimRange(N.ASTNode.getSourceRange(), N.Selected); + if (N.Selected == NoTokens) + N.Selected = SelectionTree::Unselected; if (N.Selected || !N.Children.empty()) { // Attach to the tree. N.Parent->Children.push_back(&N); @@ -424,31 +593,12 @@ // This is usually called from pop(), so we can take children into account. // The existing state of Result is relevant (early/late claims can interact). void claimRange(SourceRange S, SelectionTree::Selection &Result) { - if (!S.isValid()) - return; - // toHalfOpenFileRange() allows selection of constructs in macro args. e.g: - // #define LOOP_FOREVER(Body) for(;;) { Body } - // void IncrementLots(int &x) { - // LOOP_FOREVER( ++x; ) - // } - // Selecting "++x" or "x" will do the right thing. - auto Range = toHalfOpenFileRange(SM, LangOpts, S); - assert(Range && "We should be able to get the File Range"); - dlog("{1}claimRange: {0}", Range->printToString(SM), indent()); - auto B = SM.getDecomposedLoc(Range->getBegin()); - auto E = SM.getDecomposedLoc(Range->getEnd()); - // Otherwise, nodes in macro expansions can't be selected. - if (B.first != SelFile || E.first != SelFile) - return; - // Attempt to claim the remaining range. If there's nothing to claim, only - // children were selected. - Claimed.claim(B.second, E.second, Result); - if (Result) - dlog("{1}hit selection: {0}", - SourceRange(SM.getComposedLoc(B.first, B.second), - SM.getComposedLoc(E.first, E.second)) - .printToString(SM), - indent()); + for (const auto &ClaimedRange : + UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S))) + update(Result, SelChecker.test(ClaimedRange)); + + if (Result && Result != NoTokens) + dlog("{1}hit selection: {0}", S.printToString(SM), indent()); } std::string indent(int Offset = 0) { @@ -463,17 +613,11 @@ #ifndef NDEBUG const PrintingPolicy &PrintPolicy; #endif + const syntax::TokenBuffer &TokenBuf; std::stack Stack; - SelectedTokens Claimed; + SelectionTester SelChecker; + IntervalSet UnclaimedExpandedTokens; std::deque Nodes; // Stable pointers as we add more nodes. - FileID SelFile; - // If the selection start slices a token in half, the beginning of that token. - // This is useful for checking whether the end of a token range overlaps - // the selection: range.end < SelBeginTokenStart is equivalent to - // range.end + measureToken(range.end) < SelBegin (assuming range.end points - // to a token), and it saves a lex every time. - unsigned SelBeginTokenStart; - unsigned SelEnd; }; } // namespace diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp --- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp +++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp @@ -136,6 +136,15 @@ }, { R"cpp( + int x(int); + #define M(foo) x(foo) + int a = 42; + int b = M([[^a]]); + )cpp", + "DeclRefExpr", + }, + { + R"cpp( void foo(); #define CALL_FUNCTION(X) X() void bar() { CALL_FUNCTION([[f^o^o]]); } @@ -378,6 +387,7 @@ $C[[return]]; }]] else [[{^ }]]]] + char z; } )cpp", R"cpp( @@ -386,10 +396,10 @@ void foo(^$C[[unique_ptr<$C[[unique_ptr<$C[[int]]>]]>]]^ a) {} )cpp", R"cpp(int a = [[5 >^> 1]];)cpp", - R"cpp([[ + R"cpp( #define ECHO(X) X - ECHO(EC^HO([[$C[[int]]) EC^HO(a]])); - ]])cpp", + ECHO(EC^HO($C[[int]]) EC^HO(a)); + )cpp", R"cpp( $C[[^$C[[int]] a^]]; )cpp", R"cpp( $C[[^$C[[int]] a = $C[[5]]^]]; )cpp", }; @@ -428,6 +438,56 @@ EXPECT_EQ("WhileStmt", T.commonAncestor()->Parent->kind()); } +TEST(SelectionTest, IncludedFile) { + const char *Case = R"cpp( + void test() { +#include "Exp^and.inc" + break; + } + )cpp"; + Annotations Test(Case); + auto TU = TestTU::withCode(Test.code()); + TU.AdditionalFiles["Expand.inc"] = "while(1)\n"; + auto AST = TU.build(); + auto T = makeSelectionTree(Case, AST); + + EXPECT_EQ("WhileStmt", T.commonAncestor()->kind()); +} + +TEST(SelectionTest, MacroArgExpansion) { + // If a macro arg is expanded several times, we consider them all selected. + const char *Case = R"cpp( + int mul(int, int); + #define SQUARE(X) mul(X, X); + int nine = SQUARE(^3); + )cpp"; + Annotations Test(Case); + auto AST = TestTU::withCode(Test.code()).build(); + auto T = makeSelectionTree(Case, AST); + // Unfortunately, this makes the common ancestor the CallExpr... + // FIXME: hack around this by picking one? + EXPECT_EQ("CallExpr", T.commonAncestor()->kind()); + EXPECT_FALSE(T.commonAncestor()->Selected); + EXPECT_EQ(2u, T.commonAncestor()->Children.size()); + for (const auto* N : T.commonAncestor()->Children) { + EXPECT_EQ("IntegerLiteral", N->kind()); + EXPECT_TRUE(N->Selected); + } + + // Verify that the common assert() macro doesn't suffer from this. + // (This is because we don't associate the stringified token with the arg). + Case = R"cpp( + void die(const char*); + #define assert(x) (x ? (void)0 : die(#x) + void foo() { assert(^42); } + )cpp"; + Test = Annotations(Case); + AST = TestTU::withCode(Test.code()).build(); + T = makeSelectionTree(Case, AST); + + EXPECT_EQ("IntegerLiteral", T.commonAncestor()->kind()); +} + TEST(SelectionTest, Implicit) { const char* Test = R"cpp( struct S { S(const char*); }; diff --git a/clang-tools-extra/clangd/unittests/TweakTests.cpp b/clang-tools-extra/clangd/unittests/TweakTests.cpp --- a/clang-tools-extra/clangd/unittests/TweakTests.cpp +++ b/clang-tools-extra/clangd/unittests/TweakTests.cpp @@ -269,7 +269,7 @@ EXPECT_UNAVAILABLE(UnavailableCases); // vector of pairs of input and output strings - const std::vector> + const std::vector> InputOutputs = { // extraction from variable declaration/assignment {R"cpp(void varDecl() { @@ -321,17 +321,10 @@ if(1) LOOP(5 + [[3]]) })cpp", - /*FIXME: It should be extracted like this. SelectionTree needs to be - * fixed for macros. R"cpp(#define LOOP(x) while (1) {a = x;} - void f(int a) { - auto dummy = 3; if(1) - LOOP(5 + dummy) - })cpp"},*/ - R"cpp(#define LOOP(x) while (1) {a = x;} void f(int a) { - auto dummy = LOOP(5 + 3); if(1) - dummy + auto dummy = 3; if(1) + LOOP(5 + dummy) })cpp"}, {R"cpp(#define LOOP(x) do {x;} while(1); void f(int a) { @@ -644,13 +637,18 @@ )cpp"; EXPECT_EQ(apply(TemplateFailInput), "unavailable"); - // FIXME: This should be extractable after selectionTree works correctly for - // macros (currently it doesn't select anything for the following case) - std::string MacroFailInput = R"cpp( + std::string MacroInput = R"cpp( #define F(BODY) void f() { BODY } F ([[int x = 0;]]) )cpp"; - EXPECT_EQ(apply(MacroFailInput), "unavailable"); + std::string MacroOutput = R"cpp( + #define F(BODY) void f() { BODY } + void extracted() { +int x = 0; +} +F (extracted();) + )cpp"; + EXPECT_EQ(apply(MacroInput), MacroOutput); // Shouldn't crash. EXPECT_EQ(apply("void f([[int a]]);"), "unavailable"); diff --git a/clang/include/clang/Tooling/Syntax/Tokens.h b/clang/include/clang/Tooling/Syntax/Tokens.h --- a/clang/include/clang/Tooling/Syntax/Tokens.h +++ b/clang/include/clang/Tooling/Syntax/Tokens.h @@ -175,6 +175,7 @@ /// All tokens produced by the preprocessor after all macro replacements, /// directives, etc. Source locations found in the clang AST will always /// point to one of these tokens. + /// Tokens are in TU order (per SourceManager::isBeforeInTranslationUnit()). /// FIXME: figure out how to handle token splitting, e.g. '>>' can be split /// into two '>' tokens by the parser. However, TokenBuffer currently /// keeps it as a single '>>' token. @@ -182,6 +183,10 @@ return ExpandedTokens; } + /// Returns the subrange of expandedTokens() corresponding to the closed + /// token range R. + llvm::ArrayRef expandedTokens(SourceRange R) const; + /// Find the subrange of spelled tokens that produced the corresponding \p /// Expanded tokens. /// diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -119,6 +119,22 @@ return Text.substr(Begin, length()); } +llvm::ArrayRef TokenBuffer::expandedTokens(SourceRange R) const { + if (R.isInvalid()) + return {}; + const Token *Begin = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return SourceMgr->isBeforeInTranslationUnit(T.location(), R.getBegin()); + }); + const Token *End = + llvm::partition_point(expandedTokens(), [&](const syntax::Token &T) { + return !SourceMgr->isBeforeInTranslationUnit(R.getEnd(), T.location()); + }); + if (Begin > End) + return {}; + return {Begin, End}; +} + std::pair TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const { assert(Expanded); diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -40,6 +40,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Testing/Support/Annotations.h" #include "llvm/Testing/Support/SupportHelpers.h" +#include "gmock/gmock.h" #include #include #include @@ -663,6 +664,20 @@ ValueIs(SameRange(findSpelled("not_mapped")))); } +TEST_F(TokenBufferTest, ExpandedTokensForRange) { + recordTokens(R"cpp( + #define SIGN(X) X##_washere + A SIGN(B) C SIGN(D) E SIGN(F) G + )cpp"); + + SourceRange R(findExpanded("C").front().location(), + findExpanded("F_washere").front().location()); + // Sanity check: expanded and spelled tokens are stored separately. + EXPECT_THAT(Buffer.expandedTokens(R), + SameRange(findExpanded("C D_washere E F_washere"))); + EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty()); +} + TEST_F(TokenBufferTest, ExpansionStartingAt) { // Object-like macro expansions. recordTokens(R"cpp(