diff --git a/clang/include/clang/Tooling/Transformer/Parsing.h b/clang/include/clang/Tooling/Transformer/Parsing.h new file mode 100644 --- /dev/null +++ b/clang/include/clang/Tooling/Transformer/Parsing.h @@ -0,0 +1,39 @@ +//===--- Parsing.h - Parsing library for Transformer ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// Defines parsing functions for Transformer types. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ +#define LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ + +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Tooling/Transformer/RangeSelector.h" +#include "llvm/Support/Error.h" +#include +#include + +namespace clang { +namespace transformer { + +/// Parses a string-representation of a \c RangeSelector. The grammar of these +/// strings is closely based on the (sub)grammar of \c RangeSelectors as they'd +/// appear in C++ code. However, where the C++ code takes a string (identifier), +/// this language takes an identifier. So, for example, the C++ selector +/// `node("id")` is written simply as `node(id)`. Additionally, the \c charRange +/// combinator is not supported, because there is no representation of values of +/// type \c CharSourceRange in this (little) language. +llvm::Expected parseRangeSelector(llvm::StringRef Input); + +} // namespace transformer +} // namespace clang + +#endif // LLVM_CLANG_TOOLING_REFACTOR_PARSING_H_ diff --git a/clang/lib/Tooling/Transformer/Parsing.cpp b/clang/lib/Tooling/Transformer/Parsing.cpp new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Transformer/Parsing.cpp @@ -0,0 +1,248 @@ +//===--- Parsing.cpp - Parsing function implementations ------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Transformer/Parsing.h" +#include "clang/AST/Expr.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/SourceLocation.h" +#include "clang/Lex/Lexer.h" +#include "clang/Tooling/Transformer/RangeSelector.h" +#include "clang/Tooling/Transformer/SourceCode.h" +#include "llvm/ADT/None.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Errc.h" +#include "llvm/Support/Error.h" +#include +#include +#include + +using namespace clang; +using namespace transformer; + +namespace { +using llvm::Error; +using llvm::Expected; + +template using RangeSelectorOp = RangeSelector (*)(Ts...); + +struct ParseState { + // The remaining input to be processed. + StringRef Input; + // The original input. Not modified during parsing; only for reference in + // error reporting. + StringRef OriginalInput; +}; + +// Represents an intermediate result returned by a parsing function. Functions +// that don't generate values should use `llvm::None` +template struct ParseProgress { + ParseState State; + // Intermediate result generated by the Parser. + ResultType Value; +}; + +template using ExpectedProgress = llvm::Expected>; +template using ParseFunction = ExpectedProgress (*)(ParseState); + +class ParseError : public llvm::ErrorInfo { +public: + // Required field for all ErrorInfo derivatives. + static char ID; + + ParseError(size_t Pos, std::string ErrorMsg) + : Pos(Pos), ErrorMsg(std::move(ErrorMsg)) {} + + void log(llvm::raw_ostream &OS) const override { + OS << "parse error at position (" << Pos << "): " << ErrorMsg; + } + + std::error_code convertToErrorCode() const override { + return llvm::inconvertibleErrorCode(); + } + + // Position of the error in the input string. + size_t Pos; + std::string ErrorMsg; +}; + +char ParseError::ID; +} // namespace + +static const llvm::StringMap> & +getUnaryStringSelectors() { + static const llvm::StringMap> M = { + {"name", name}, + {"node", node}, + {"statement", statement}, + {"statements", statements}, + {"member", member}, + {"callArgs", callArgs}, + {"elseBranch", elseBranch}, + {"initListElements", initListElements}}; + return M; +} +static const llvm::StringMap> & +getUnaryRangeSelectors() { + static const llvm::StringMap> M = { + {"before", before}, {"after", after}, {"expansion", expansion}}; + return M; +} + +static const llvm::StringMap> & +getBinaryStringSelectors() { + static const llvm::StringMap> M = { + {"encloseNodes", range}}; + return M; +} + +static const llvm::StringMap> & +getBinaryRangeSelectors() { + static const llvm::StringMap> + M = {{"enclose", range}}; + return M; +} + +template +llvm::Optional findOptional(const llvm::StringMap &Map, + llvm::StringRef Key) { + auto it = Map.find(Key); + if (it == Map.end()) + return llvm::None; + return it->second; +} + +template +ParseProgress makeParseProgress(ParseState State, + ResultType Result) { + return ParseProgress{State, std::move(Result)}; +} + +static llvm::Error makeParseError(const ParseState &S, llvm::Twine ErrorMsg) { + return llvm::make_error(S.OriginalInput.size() - S.Input.size(), + ErrorMsg.str()); +} + +// Returns a new ParseState that advances \c S by \c N characters. +static ParseState advance(ParseState S, size_t N) { + S.Input = S.Input.drop_front(N); + return S; +} + +static StringRef consumeWhitespace(StringRef S) { + return S.drop_while([](char c) { return c >= 0 && isWhitespace(c); }); +} + +// Parses a single expected character \c c from \c State, skipping preceding +// whitespace. Error if the expected character isn't found. +static ExpectedProgress parseChar(char c, ParseState State) { + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty() || State.Input.front() != c) + return makeParseError(State, "expected char not found: " + llvm::Twine(c)); + return makeParseProgress(advance(State, 1), llvm::None); +} + +// Parses an identitifer "token" -- handles preceding whitespace. +static ExpectedProgress parseId(ParseState State) { + State.Input = consumeWhitespace(State.Input); + auto Id = State.Input.take_while( + [](char c) { return c >= 0 && isIdentifierBody(c); }); + if (Id.empty()) + return makeParseError(State, "failed to parse name"); + return makeParseProgress(advance(State, Id.size()), Id.str()); +} + +// Parses a single element surrounded by parens. `Op` is applied to the parsed +// result to create the result of this function call. +template +ExpectedProgress parseSingle(ParseFunction ParseElement, + RangeSelectorOp Op, + ParseState State) { + auto P = parseChar('(', State); + if (!P) + return P.takeError(); + + auto E = ParseElement(P->State); + if (!E) + return E.takeError(); + + P = parseChar(')', E->State); + if (!P) + return P.takeError(); + + return makeParseProgress(P->State, Op(std::move(E->Value))); +} + +// Parses a pair of elements surrounded by parens and separated by comma. `Op` +// is applied to the parsed results to create the result of this function call. +template +ExpectedProgress parsePair(ParseFunction ParseElement, + RangeSelectorOp Op, + ParseState State) { + auto P = parseChar('(', State); + if (!P) + return P.takeError(); + + auto Left = ParseElement(P->State); + if (!Left) + return Left.takeError(); + + P = parseChar(',', Left->State); + if (!P) + return P.takeError(); + + auto Right = ParseElement(P->State); + if (!Right) + return Right.takeError(); + + P = parseChar(')', Right->State); + if (!P) + return P.takeError(); + + return makeParseProgress(P->State, + Op(std::move(Left->Value), std::move(Right->Value))); +} + +// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or +// Id operator). Returns StencilType representing the operator on success and +// error if it fails to parse input for an operator. +static ExpectedProgress +parseRangeSelectorImpl(ParseState State) { + auto Id = parseId(State); + if (!Id) + return Id.takeError(); + + std::string OpName = std::move(Id->Value); + if (auto Op = findOptional(getUnaryStringSelectors(), OpName)) + return parseSingle(parseId, *Op, Id->State); + + if (auto Op = findOptional(getUnaryRangeSelectors(), OpName)) + return parseSingle(parseRangeSelectorImpl, *Op, Id->State); + + if (auto Op = findOptional(getBinaryStringSelectors(), OpName)) + return parsePair(parseId, *Op, Id->State); + + if (auto Op = findOptional(getBinaryRangeSelectors(), OpName)) + return parsePair(parseRangeSelectorImpl, *Op, Id->State); + + return makeParseError(State, "unknown selector name: " + OpName); +} + +Expected transformer::parseRangeSelector(llvm::StringRef Input) { + ParseState State = {Input, Input}; + ExpectedProgress Result = parseRangeSelectorImpl(State); + if (!Result) + return Result.takeError(); + State = Result->State; + // Discard any potentially trailing whitespace. + State.Input = consumeWhitespace(State.Input); + if (State.Input.empty()) + return Result->Value; + return makeParseError(State, "unexpected input after selector"); +} diff --git a/clang/unittests/Tooling/RangeSelectorTest.cpp b/clang/unittests/Tooling/RangeSelectorTest.cpp --- a/clang/unittests/Tooling/RangeSelectorTest.cpp +++ b/clang/unittests/Tooling/RangeSelectorTest.cpp @@ -10,6 +10,7 @@ #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Frontend/ASTUnit.h" #include "clang/Tooling/Tooling.h" +#include "clang/Tooling/Transformer/Parsing.h" #include "clang/Tooling/Transformer/SourceCode.h" #include "llvm/Support/Error.h" #include "llvm/Testing/Support/Error.h" @@ -132,13 +133,36 @@ int f(int x, int y, int z) { return 3; } int g() { return f(/* comment */ 3, 7 /* comment */, 9); } )cc"; - const char *Call = "call"; - TestMatch Match = matchCode(Code, callExpr().bind(Call)); - const auto* E = Match.Result.Nodes.getNodeAs(Call); + StringRef CallID = "call"; + ast_matchers::internal::Matcher M = callExpr().bind(CallID); + RangeSelector R = before(node(CallID.str())); + + TestMatch Match = matchCode(Code, M); + const auto *E = Match.Result.Nodes.getNodeAs(CallID); assert(E != nullptr); auto ExprBegin = E->getSourceRange().getBegin(); EXPECT_THAT_EXPECTED( - before(node(Call))(Match.Result), + R(Match.Result), + HasValue(EqualsCharSourceRange( + CharSourceRange::getCharRange(ExprBegin, ExprBegin)))); +} + +TEST(RangeSelectorTest, BeforeOpParsed) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + StringRef CallID = "call"; + ast_matchers::internal::Matcher M = callExpr().bind(CallID); + auto R = parseRangeSelector("before(node(call))"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + + TestMatch Match = matchCode(Code, M); + const auto *E = Match.Result.Nodes.getNodeAs(CallID); + assert(E != nullptr); + auto ExprBegin = E->getSourceRange().getBegin(); + EXPECT_THAT_EXPECTED( + (*R)(Match.Result), HasValue(EqualsCharSourceRange( CharSourceRange::getCharRange(ExprBegin, ExprBegin)))); } @@ -169,45 +193,82 @@ HasValue(EqualsCharSourceRange(ExpectedAfter))); } -TEST(RangeSelectorTest, RangeOp) { +// Node-id specific version. +TEST(RangeSelectorTest, RangeOpNodes) { StringRef Code = R"cc( int f(int x, int y, int z) { return 3; } int g() { return f(/* comment */ 3, 7 /* comment */, 9); } )cc"; - const char *Arg0 = "a0"; - const char *Arg1 = "a1"; - StringRef Call = "call"; - auto Matcher = callExpr(hasArgument(0, expr().bind(Arg0)), - hasArgument(1, expr().bind(Arg1))) - .bind(Call); + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1"))); + RangeSelector R = range("a0", "a1"); + TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7")); +} + +TEST(RangeSelectorTest, RangeOpGeneral) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1"))); + RangeSelector R = range(node("a0"), node("a1")); TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3, 7")); +} - // Node-id specific version: - EXPECT_THAT_EXPECTED(select(range(Arg0, Arg1), Match), HasValue("3, 7")); - // General version: - EXPECT_THAT_EXPECTED(select(range(node(Arg0), node(Arg1)), Match), - HasValue("3, 7")); +TEST(RangeSelectorTest, RangeOpNodesParsed) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1"))); + auto R = parseRangeSelector("encloseNodes(a0, a1)"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7")); +} + +TEST(RangeSelectorTest, RangeOpGeneralParsed) { + StringRef Code = R"cc( + int f(int x, int y, int z) { return 3; } + int g() { return f(/* comment */ 3, 7 /* comment */, 9); } + )cc"; + auto Matcher = callExpr(hasArgument(0, expr().bind("a0")), + hasArgument(1, expr().bind("a1"))); + auto R = parseRangeSelector("enclose(node(a0), node(a1))"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + TestMatch Match = matchCode(Code, Matcher); + EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3, 7")); } TEST(RangeSelectorTest, NodeOpStatement) { StringRef Code = "int f() { return 3; }"; - const char *ID = "id"; - TestMatch Match = matchCode(Code, returnStmt().bind(ID)); - EXPECT_THAT_EXPECTED(select(node(ID), Match), HasValue("return 3;")); + TestMatch Match = matchCode(Code, returnStmt().bind("id")); + EXPECT_THAT_EXPECTED(select(node("id"), Match), HasValue("return 3;")); } TEST(RangeSelectorTest, NodeOpExpression) { StringRef Code = "int f() { return 3; }"; - const char *ID = "id"; - TestMatch Match = matchCode(Code, expr().bind(ID)); - EXPECT_THAT_EXPECTED(select(node(ID), Match), HasValue("3")); + TestMatch Match = matchCode(Code, expr().bind("id")); + EXPECT_THAT_EXPECTED(select(node("id"), Match), HasValue("3")); } TEST(RangeSelectorTest, StatementOp) { StringRef Code = "int f() { return 3; }"; - const char *ID = "id"; - TestMatch Match = matchCode(Code, expr().bind(ID)); - EXPECT_THAT_EXPECTED(select(statement(ID), Match), HasValue("3;")); + TestMatch Match = matchCode(Code, expr().bind("id")); + RangeSelector R = statement("id"); + EXPECT_THAT_EXPECTED(select(R, Match), HasValue("3;")); +} + +TEST(RangeSelectorTest, StatementOpParsed) { + StringRef Code = "int f() { return 3; }"; + TestMatch Match = matchCode(Code, expr().bind("id")); + auto R = parseRangeSelector("statement(id)"); + ASSERT_THAT_EXPECTED(R, llvm::Succeeded()); + EXPECT_THAT_EXPECTED(select(*R, Match), HasValue("3;")); } TEST(RangeSelectorTest, MemberOp) {