diff --git a/clang/include/clang/Tooling/Refactoring/RangeSelector.h b/clang/include/clang/Tooling/Refactoring/RangeSelector.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Refactoring/RangeSelector.h @@ -0,0 +1,79 @@ +//===--- RangeSelector.h - Source-selection library ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines a combinator library supporting the definition of _selectors_, +/// which select source ranges based on (bound) AST nodes. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_ +#define LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_ + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceLocation.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include + +namespace clang { +namespace tooling { +using RangeSelector = std::function( + const ast_matchers::MatchFinder::MatchResult &)>; + +namespace range_selector { +inline RangeSelector charRange(CharSourceRange R) { + return [R](const ast_matchers::MatchFinder::MatchResult &) + -> Expected { return R; }; +} + +/// \returns the range corresponding to the identified node. +RangeSelector node(StringRef Id); +/// Variant of \c node() that identifies the node as a statement, for purposes +/// of deciding whether to include any trailing semicolon in the selected range. +/// Only relevant for Expr nodes, which, by default, are *not* considered as +/// statements. +/// \returns the range corresponding to the identified node, considered as a +/// statement. +RangeSelector sNode(StringRef Id); + +/// Convenience version of \c range where end points are nodes. +RangeSelector nodeRange(StringRef BeginId, StringRef EndId); + +/// Given a \c MemberExpr bound to \c Id, selects the member's token. +RangeSelector member(StringRef Id); + +/// Given a \c NamedDecl or \c CxxCtorInitializer bound to \c Id, selects that +/// token of the relevant name, not including qualifiers. +RangeSelector name(StringRef Id); + +// Given a reference to call expression node, yields the source text of the +// arguments (all source between the call's parentheses). +RangeSelector args(StringRef Id); + +// Given a reference to a compound statement node, yields the range of the +// statements (all source between the braces). +RangeSelector statements(StringRef Id); + +// Given a reference to an initializer-list expression node, yields the range of +// the elements (all source between the braces). +RangeSelector elements(StringRef Id); + +/// Yields the range that starts at the start of \p Begin and extends to the end +/// of \p End. +RangeSelector range(RangeSelector Begin, RangeSelector End); + +/// Yields the range from which `S` was expanded (possibly along with other +/// source), if `S` is an expansion, and `S` itself, otherwise. Corresponds to +/// `SourceManager::getExpansionRange`. +RangeSelector contraction(RangeSelector S); +} // namespace range_selector +} // namespace tooling +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_ diff --git a/clang/include/clang/Tooling/Refactoring/SourceCode.h b/clang/include/clang/Tooling/Refactoring/SourceCode.h --- a/clang/include/clang/Tooling/Refactoring/SourceCode.h +++ b/clang/include/clang/Tooling/Refactoring/SourceCode.h @@ -72,6 +72,18 @@ ASTContext &Context) { return getText(getExtendedRange(Node, Next, Context), Context); } + +SourceLocation findPreviousTokenStart(SourceLocation Start, + const SourceManager &SM, + const LangOptions &LangOpts); + +SourceLocation findPreviousTokenKind(SourceLocation Start, + const SourceManager &SM, + const LangOptions &LangOpts, + tok::TokenKind TK); + +SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM, + const LangOptions &LangOpts); } // namespace tooling } // namespace clang #endif // LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_H diff --git a/clang/lib/Tooling/Refactoring/CMakeLists.txt b/clang/lib/Tooling/Refactoring/CMakeLists.txt --- a/clang/lib/Tooling/Refactoring/CMakeLists.txt +++ b/clang/lib/Tooling/Refactoring/CMakeLists.txt @@ -6,6 +6,7 @@ AtomicChange.cpp Extract/Extract.cpp Extract/SourceExtraction.cpp + RangeSelector.cpp RefactoringActions.cpp Rename/RenamingAction.cpp Rename/SymbolOccurrences.cpp diff --git a/clang/lib/Tooling/Refactoring/RangeSelector.cpp b/clang/lib/Tooling/Refactoring/RangeSelector.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Refactoring/RangeSelector.cpp @@ -0,0 +1,219 @@ +//===--- Transformer.cpp - Transformer library implementation ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Refactoring/RangeSelector.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Tooling/Refactoring/SourceCode.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include +#include +#include + +using namespace clang; +using namespace tooling; + +using ast_matchers::MatchFinder; +using ast_type_traits::ASTNodeKind; +using ast_type_traits::DynTypedNode; +using llvm::Error; +using llvm::StringError; + +using MatchResult = MatchFinder::MatchResult; + +static Error invalidArgumentError(Twine Message) { + return llvm::make_error(llvm::errc::invalid_argument, Message); +} + +static Error typeError(StringRef Id, const ASTNodeKind &Kind) { + return invalidArgumentError("Mismatched type (node id=" + Id + + " kind=" + Kind.asStringRef() + ")"); +} + +static Error typeError(StringRef Id, const ASTNodeKind &Kind, Twine ExpectedType) { + return invalidArgumentError("Expected one of " + ExpectedType + " (node id=" + Id + + " kind=" + Kind.asStringRef() + ")"); +} + +static Error missingPropertyError(StringRef Id, Twine Description, + StringRef Property) { + return invalidArgumentError(Description + " requires property '" + Property + + "' (node id=" + Id + ")"); +} + +static Expected getNode(const ast_matchers::BoundNodes &Nodes, + StringRef Id) { + auto &NodesMap = Nodes.getMap(); + auto It = NodesMap.find(Id); + if (It == NodesMap.end()) + return invalidArgumentError("Id not bound: " + Id); + return It->second; +} + +RangeSelector range_selector::node(StringRef Id) { + return [Id](const MatchResult &Result) -> Expected { + Expected Node = getNode(Result.Nodes, Id); + if (!Node) + return Node.takeError(); + return Node->get() != nullptr && Node->get() == nullptr + ? getExtendedRange(*Node, tok::TokenKind::semi, *Result.Context) + : CharSourceRange::getTokenRange(Node->getSourceRange()); + }; +} + +RangeSelector range_selector::sNode(StringRef Id) { + return [Id](const MatchResult &Result) -> Expected { + Expected Node = getNode(Result.Nodes, Id); + if (!Node) + return Node.takeError(); + return getExtendedRange(*Node, tok::TokenKind::semi, *Result.Context); + }; +} + +RangeSelector range_selector::range(RangeSelector Begin, RangeSelector End) { + return [Begin, End](const MatchResult &Result) -> Expected { + Expected BeginRange = Begin(Result); + if (!BeginRange) + return BeginRange.takeError(); + Expected EndRange = End(Result); + if (!EndRange) + return EndRange.takeError(); + SourceLocation B = BeginRange->getBegin(); + SourceLocation E = EndRange->getEnd(); + // Note: we are precluding the possibility of sub-token ranges in the case + // that EndRange is a token range. + if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) { + return invalidArgumentError("Bad range: out of order"); + } + return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange()); + }; +} + +RangeSelector range_selector::nodeRange(StringRef BeginId, StringRef EndId) { + return range_selector::range(node(BeginId), node(EndId)); +} + +RangeSelector range_selector::member(StringRef Id) { + return [Id](const MatchResult &Result) -> Expected { + Expected Node = getNode(Result.Nodes, Id); + if (!Node) + return Node.takeError(); + if (auto *M = Node->get()) + return CharSourceRange::getTokenRange( + M->getMemberNameInfo().getSourceRange()); + return typeError(Id, Node->getNodeKind(), "MemberExpr"); + }; +} + +RangeSelector range_selector::name(StringRef Id) { + return [Id](const MatchResult &Result) -> Expected { + Expected N = getNode(Result.Nodes, Id); + if (!N) + return N.takeError(); + auto &Node = *N; + if (const auto *D = Node.get()) { + if (!D->getDeclName().isIdentifier()) + return missingPropertyError(Id, "name", "identifier"); + SourceLocation L = D->getLocation(); + auto R = CharSourceRange::getTokenRange(L, L); + // Verify that the range covers exactly the name. + // FIXME: extend this code to support cases like `operator +` or + // `foo` for which this range will be too short. Doing so will + // require subcasing `NamedDecl`, because it doesn't provide virtual + // access to the \c DeclarationNameInfo. + if (getText(R, *Result.Context) != D->getName()) + return CharSourceRange(); + return R; + } + if (const auto *E = Node.get()) { + if (!E->getNameInfo().getName().isIdentifier()) + return missingPropertyError(Id, "name", "identifier"); + SourceLocation L = E->getLocation(); + return CharSourceRange::getTokenRange(L, L); + } + if (const auto *I = Node.get()) { + if (!I->isMemberInitializer() && I->isWritten()) + return missingPropertyError(Id, "name", + "explicit member initializer"); + SourceLocation L = I->getMemberLocation(); + return CharSourceRange::getTokenRange(L, L); + } + return typeError(Id, Node.getNodeKind(), + "DeclRefExpr, NamedDecl, CXXCtorInitializer"); + }; +} + +namespace { +// Creates a selector from a range-selection function `Func`, which selects a +// range that is relative to a bound node id. `ArgT` is the node type expected +// by `Func`. +template +class RelativeSelector { + std::string Id; + +public: + RelativeSelector(StringRef Id) : Id(Id) {} + + Expected operator()(const MatchResult &Result) { + Expected N = getNode(Result.Nodes, Id); + if (!N) + return N.takeError(); + if (const auto *Arg = N->get()) + return Func(Result, *Arg); + return typeError(Id, N->getNodeKind()); + } +}; +} // namespace + +// Returns the range of the statements (all source between the braces). +static CharSourceRange getStatementsRange(const MatchResult &, + const CompoundStmt &CS) { + return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1), + CS.getRBracLoc()); +} + +RangeSelector range_selector::statements(StringRef Id) { + return RelativeSelector(Id); +} + +// Returns the range of the source between the call's parentheses. +static CharSourceRange getArgumentsRange(const MatchResult &Result, + const CallExpr &CE) { + return CharSourceRange::getCharRange( + findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts()) + .getLocWithOffset(1), + CE.getRParenLoc()); +} + +RangeSelector range_selector::args(StringRef Id) { + return RelativeSelector(Id); +} + +// Returns the range of the elements of the initializer list. Includes all +// source between the braces. +static CharSourceRange getElementsRange(const MatchResult &, + const InitListExpr &E) { + return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1), + E.getRBraceLoc()); +} + +RangeSelector range_selector::elements(StringRef Id) { + return RelativeSelector(Id); +} + +RangeSelector contraction(RangeSelector S) { + return [S](const MatchResult &Result) -> Expected { + Expected SRange = S(Result); + if (!SRange) + return SRange.takeError(); + return Result.SourceManager->getExpansionRange(*SRange); + }; +} diff --git a/clang/lib/Tooling/Refactoring/SourceCode.cpp b/clang/lib/Tooling/Refactoring/SourceCode.cpp --- a/clang/lib/Tooling/Refactoring/SourceCode.cpp +++ b/clang/lib/Tooling/Refactoring/SourceCode.cpp @@ -14,18 +14,58 @@ using namespace clang; -StringRef clang::tooling::getText(CharSourceRange Range, - const ASTContext &Context) { +StringRef tooling::getText(CharSourceRange Range, const ASTContext &Context) { return Lexer::getSourceText(Range, Context.getSourceManager(), Context.getLangOpts()); } -CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range, - tok::TokenKind Next, - ASTContext &Context) { +CharSourceRange tooling::maybeExtendRange(CharSourceRange Range, + tok::TokenKind Next, + ASTContext &Context) { Optional Tok = Lexer::findNextToken( Range.getEnd(), Context.getSourceManager(), Context.getLangOpts()); if (!Tok || !Tok->is(Next)) return Range; return CharSourceRange::getTokenRange(Range.getBegin(), Tok->getLocation()); } + +SourceLocation tooling::findPreviousTokenStart(SourceLocation Start, + const SourceManager &SM, + const LangOptions &LangOpts) { + if (Start.isInvalid() || Start.isMacroID()) + return SourceLocation(); + + SourceLocation BeforeStart = Start.getLocWithOffset(-1); + if (BeforeStart.isInvalid() || BeforeStart.isMacroID()) + return SourceLocation(); + + return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts); +} + +SourceLocation tooling::findPreviousTokenKind(SourceLocation Start, + const SourceManager &SM, + const LangOptions &LangOpts, + tok::TokenKind TK) { + while (true) { + SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts); + if (L.isInvalid() || L.isMacroID()) + return SourceLocation(); + + Token T; + if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true)) + return SourceLocation(); + + if (T.is(TK)) + return T.getLocation(); + + Start = L; + } +} + +SourceLocation tooling::findOpenParen(const CallExpr &E, + const SourceManager &SM, + const LangOptions &LangOpts) { + SourceLocation EndLoc = + E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc(); + return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren); +}