Index: clangd/Selection.h =================================================================== --- clangd/Selection.h +++ clangd/Selection.h @@ -89,6 +89,15 @@ Node *Parent; // Direct children within the selection tree. llvm::SmallVector Children; + // Code ranges covered by this node. Usually {ASTNode.getSourceRange()}. + // Some nodes have "split" ranges, e.g. + // void SomeFunction(int); + // ~~~~ ~~~~~ <== FunctionTypeLoc is split. + // Or the range is narrrower than ASTNode.getSourceRange() + // return Foo(42); + // ~~~~ <== CXXConstructExpr excludes 'Foo'. + // Ranges are sorted and valid. + llvm::SmallVector Range; // The corresponding node from the full AST. ast_type_traits::DynTypedNode ASTNode; // The extent to which this node is covered by the selection. Index: clangd/Selection.cpp =================================================================== --- clangd/Selection.cpp +++ clangd/Selection.cpp @@ -8,13 +8,57 @@ #include "Selection.h" #include "ClangdUnit.h" +#include "clang/AST/ASTTypeTraits.h" +#include "clang/AST/PrettyPrinter.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/AST/TypeLoc.h" +#include "llvm/ADT/STLExtras.h" +#include namespace clang { namespace clangd { namespace { using Node = SelectionTree::Node; using ast_type_traits::DynTypedNode; +using OffsetRange = std::pair; + +// Stores a collection of (possibly-overlapping) OffsetRanges. +// When new ranges are added, hit-tests them against existing ones. +class RangeSet { +public: + // Returns true if any of NewRanges are new to the set. + bool add(SmallVector NewRanges) { + assert(std::is_sorted(Ranges.begin(), Ranges.end())); + assert(std::is_sorted(NewRanges.begin(), NewRanges.end())); + if (NewRanges.empty()) + return false; + + // Check if each new range is covered by existing ranges. + // We can do this in one pass, as both lists are sorted. + auto OldIt = Ranges.begin(), OldEnd = Ranges.end(); + auto IsCovered = [&](const OffsetRange &New) -> bool { + unsigned Pos = New.first; + // Loop until this range is covered or we run out of children. + for (; Pos < New.second && OldIt != OldEnd; ++OldIt) { + if (OldIt->first > Pos) + return false; // [Pos, ChildIt->first) is not covered. + if (Pos < OldIt->second) + Pos = OldIt->second; // Erase *ChildIt from MyRange. + } + return Pos >= New.second; + }; + NewRanges.erase(llvm::remove_if(NewRanges, IsCovered), NewRanges.end()); + if (NewRanges.empty()) + return false; + + Ranges.insert(Ranges.end(), NewRanges.begin(), NewRanges.end()); + llvm::sort(Ranges); // should we merge, too? + return true; + } + +private: + std::vector Ranges; // Always sorted. +}; // We find the selection by visiting written nodes in the AST, looking for nodes // that intersect with the selected character range. @@ -51,7 +95,7 @@ // - those that can't be stored in DynTypedNode. // We're missing some interesting things like Attr due to the latter. bool TraverseDecl(Decl *X) { - if (X && isa(X)) + if (X && llvm::isa(X)) return Base::TraverseDecl(X); // Already pushed by constructor. // Base::TraverseDecl will suppress children, but not this node itself. if (X && X->isImplicit()) @@ -71,9 +115,12 @@ } // Stmt is the same, but this form allows the data recursion optimization. bool dataTraverseStmtPre(Stmt *X) { - if (!X || canSafelySkipNode(X->getSourceRange())) + if (X == nullptr) return false; - push(DynTypedNode::create(*X)); + auto Bounds = computeRanges(X); + if (canSafelySkipNode(Bounds)) + return false; + push(DynTypedNode::create(*X), std::move(Bounds)); return true; } bool dataTraverseStmtPost(Stmt *X) { @@ -86,6 +133,9 @@ private: using Base = RecursiveASTVisitor; + using Ranges = SmallVector; + using OffsetRange = std::pair; + SelectionVisitor(ASTContext &AST, unsigned SelBegin, unsigned SelEnd, FileID SelFile) : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()), @@ -104,30 +154,96 @@ // Node is always a pointer so the generic code can handle any null checks. template bool traverseNode(T *Node, const Func &Body) { - if (Node == nullptr || canSafelySkipNode(Node->getSourceRange())) + if (Node == nullptr) + return true; + auto Bounds = computeRanges(Node); + if (canSafelySkipNode(Bounds)) return true; - push(DynTypedNode::create(*Node)); + push(DynTypedNode::create(*Node), std::move(Bounds)); bool Ret = Body(); pop(); return Ret; } + // HIT TESTING + // + // We do rough hit testing on the way down the tree to avoid traversing + // subtrees that don't touch the selection (canSafelySkipNode), but + // fine-grained hit-testing is done on the way back up (in pop()). + // This means children get to claim parts of the selection first, and parents + // are only selected if they own tokens that no child owned. + // + // Nodes *usually* nest nicely: a child's getSourceRange() lies within the + // parent's, and a node (transitively) owns all tokens in its range. + // + // Exception 1: child range claims tokens that should be owned by the parent. + // e.g. in `void foo(int);`, the FunctionTypeLoc should own + // `void (int)` but the parent FunctionDecl should own `foo`. + // To handle this case, computeRanges() special cases these nodes, and can + // return a set of discontiguous ranges. + // + // Exception 2: siblings both claim the same node. + // e.g. `int x, y;` produces two sibling VarDecls. + // ~~~~~ x + // ~~~~~~~~ y + // Here the first ("leftmost") sibling claims the tokens it wants, and the + // other sibling gets what's left. So selecting "int" only includes the left + // VarDecl in the selection tree. + + template Ranges computeRanges(T *Node) const { + SourceRange Result = Node->getSourceRange(); + if (Result.isInvalid()) + return {}; + return {Result}; + } + + Ranges computeRanges(TypeLoc *Node) const { + // In `void foo()`, FunctionTypeLoc should claim `void ()`, not `foo`. + if (auto FTL = Node->getAs()) { + TypeLoc Return = FTL.getReturnLoc(); + Ranges R = computeRanges(&Return); + R.push_back(FTL.getLocalSourceRange()); + return R; + } + return computeRanges(Node); + } + + Ranges computeRanges(Stmt *Node) const { + // CXXConstructExpression's getSourceRange is pretty bizarre. It sometimes + // wants to capture the type, and sometimes the variable name. + // And it's often a sibling to the TypeLoc or child of a VarDecl, so this + // isn't what we want. + // We consider it to include the arg list only (including parens). + if (auto *CCE = dyn_cast(Node)) { + if (CCE->getParenOrBraceRange().isValid()) + return {CCE->getParenOrBraceRange()}; + else if (CCE->getNumArgs() == 1) + return computeRanges(CCE->getArg(0)); + else + return {}; + } + return computeRanges(Node); + } + // An optimization for a common case: nodes outside macro expansions that // don't intersect the selection may be recursively skipped. - bool canSafelySkipNode(SourceRange S) { - auto B = SM.getDecomposedLoc(S.getBegin()); - auto E = SM.getDecomposedLoc(S.getEnd()); + bool canSafelySkipNode(const SmallVector &Range) { + if (Range.empty()) + return true; + auto B = SM.getDecomposedLoc(Range.front().getBegin()); + auto E = SM.getDecomposedLoc(Range.back().getEnd()); if (B.first != SelFile || E.first != SelFile) return false; return B.second >= SelEnd || E.second < SelBeginTokenStart; } // Pushes a node onto the ancestor stack. Pairs with pop(). - void push(DynTypedNode Node) { + void push(DynTypedNode Node, SmallVector Range) { Nodes.emplace_back(); Nodes.back().ASTNode = std::move(Node); Nodes.back().Parent = Stack.top(); Nodes.back().Selected = SelectionTree::Unselected; + Nodes.back().Range = std::move(Range); Stack.push(&Nodes.back()); } @@ -150,80 +266,62 @@ // This runs for every node in the AST, and must be fast in common cases. // This is called from pop(), so we can take children into account. SelectionTree::Selection computeSelection(const Node &N) { - SourceRange S = N.ASTNode.getSourceRange(); - if (!S.isValid()) + if (N.Range.empty()) return SelectionTree::Unselected; + SmallVector Ranges; // getTopMacroCallerLoc() allows selection of constructs in macro args. e.g: // #define LOOP_FOREVER(Body) for(;;) { Body } // void IncrementLots(int &x) { // LOOP_FOREVER( ++x; ) // } // Selecting "++x" or "x" will do the right thing. - auto B = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getBegin())); - auto E = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getEnd())); - // Otherwise, nodes in macro expansions can't be selected. - if (B.first != SelFile || E.first != SelFile) - return SelectionTree::Unselected; + for (const SourceRange &S : N.Range) { + auto B = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getBegin())); + auto E = SM.getDecomposedLoc(SM.getTopMacroCallerLoc(S.getEnd())); + // Otherwise, nodes in macro expansions can't be selected. + if (B.first != SelFile || E.first != SelFile) + continue; + Ranges.push_back({B.second, E.second}); + } // Cheap test: is there any overlap at all between the selection and range? // Note that E.second is the *start* of the last token, which is why we // compare against the "rounded-down" SelBegin. - if (B.second >= SelEnd || E.second < SelBeginTokenStart) + if (Ranges.empty() || Ranges.front().first >= SelEnd || + Ranges.back().second < SelBeginTokenStart) return SelectionTree::Unselected; - // We hit something, need some more precise checks. + // We may have hit something, need some more precise checks. // Adjust [B, E) to be a half-open character range. - E.second += Lexer::MeasureTokenLength(S.getEnd(), SM, LangOpts); + for (auto &Range : Ranges) + Range.second += Lexer::MeasureTokenLength( + SM.getComposedLoc(SelFile, Range.second), SM, LangOpts); + auto PreciseBounds = + std::make_pair(Ranges.front().first, Ranges.back().second); + // Trim each range using the selection, drop ranges that are empty. + Ranges.erase(llvm::remove_if(Ranges, + [&](OffsetRange &Range) { + Range.first = + std::max(Range.first, SelBegin); + Range.second = + std::min(Range.second, SelEnd); + return Range.second <= Range.first; + }), + Ranges.end()); // This node's own selected text is (this range ^ selection) - child ranges. // If that's empty, then we've only collided with children. - if (nodesCoverRange(N.Children, std::max(SelBegin, B.second), - std::min(SelEnd, E.second))) + if (!Claimed.add(std::move(Ranges))) return SelectionTree::Unselected; // Hit children only. // Some of our own characters are covered, this is a true hit. - return (B.second >= SelBegin && E.second <= SelEnd) + // (Ranges is nonempty, otherwise nodesCoverRanges was true). + return (PreciseBounds.first >= SelBegin && PreciseBounds.second <= SelEnd) ? SelectionTree::Complete : SelectionTree::Partial; } - // Is the range [Begin, End) entirely covered by the union of the Nodes? - // (The range is a parent node's extent, and the covering nodes are children). - bool nodesCoverRange(llvm::ArrayRef Nodes, unsigned Begin, - unsigned End) { - if (Begin >= End) - return true; - if (Nodes.empty()) - return false; - - // Collect all the expansion ranges, as offsets. - SmallVector, 8> ChildRanges; - for (const Node *N : Nodes) { - CharSourceRange R = SM.getExpansionRange(N->ASTNode.getSourceRange()); - auto B = SM.getDecomposedLoc(R.getBegin()); - auto E = SM.getDecomposedLoc(R.getEnd()); - if (B.first != SelFile || E.first != SelFile) - continue; - // Try to cover up to the next token, spaces between children don't count. - if (auto Tok = Lexer::findNextToken(R.getEnd(), SM, LangOpts)) - E.second = SM.getFileOffset(Tok->getLocation()); - else if (R.isTokenRange()) - E.second += Lexer::MeasureTokenLength(R.getEnd(), SM, LangOpts); - ChildRanges.push_back({B.second, E.second}); - } - llvm::sort(ChildRanges); - - // Scan through the child ranges, removing as we go. - for (const auto R : ChildRanges) { - if (R.first > Begin) - return false; // [Begin, R.first) is not covered. - Begin = R.second; // Eliminate [R.first, R.second). - if (Begin >= End) - return true; // Remaining range is empty. - } - return false; // Went through all children, trailing characters remain. - } - SourceManager &SM; const LangOptions &LangOpts; std::stack Stack; + RangeSet Claimed; std::deque Nodes; // Stable pointers as we add more nodes. // Half-open selection range. unsigned SelBegin; Index: clangd/unittests/SelectionTests.cpp =================================================================== --- clangd/unittests/SelectionTests.cpp +++ clangd/unittests/SelectionTests.cpp @@ -144,9 +144,9 @@ R"cpp( void foo(); #define CALL_FUNCTION(X) X() - void bar() { CALL_FUNC^TION([[fo^o]]); } + void bar() [[{ CALL_FUNC^TION(fo^o); }]] )cpp", - "DeclRefExpr", + "CompoundStmt", }, { R"cpp( @@ -172,7 +172,20 @@ {"void foo() { [[foo^()]]; }", "CallExpr"}, {"void foo() { [[foo^]] (); }", "DeclRefExpr"}, {"int bar; void foo() [[{ foo (); }]]^", "CompoundStmt"}, + + // Tricky case: FunctionTypeLoc in FunctionDecl has a hole in it. {"[[^void]] foo();", "TypeLoc"}, + {"[[void foo^()]];", "TypeLoc"}, + {"[[^void foo^()]];", "FunctionDecl"}, + {"[[void ^foo()]];", "FunctionDecl"}, + // Tricky case: two VarDecls share a specifier. + {"[[int ^a]], b;", "VarDecl"}, + {"[[int a, ^b]];", "VarDecl"}, + // Tricky case: anonymous struct is a sibling of the VarDecl. + {"[[st^ruct {int x;}]] y;", "CXXRecordDecl"}, + {"[[struct {int x;} ^y]];", "VarDecl"}, + {"struct {[[int ^x]];} y;", "FieldDecl"}, + {"^", nullptr}, {"void foo() { [[foo^^]] (); }", "DeclRefExpr"},