diff --git a/clang/include/clang/Tooling/Syntax/Nodes.h b/clang/include/clang/Tooling/Syntax/Nodes.h --- a/clang/include/clang/Tooling/Syntax/Nodes.h +++ b/clang/include/clang/Tooling/Syntax/Nodes.h @@ -50,6 +50,11 @@ StringLiteralExpression, BoolLiteralExpression, CxxNullPtrExpression, + UnknownUserDefinedLiteralExpression, + IntegerUserDefinedLiteralExpression, + FloatUserDefinedLiteralExpression, + CharUserDefinedLiteralExpression, + StringUserDefinedLiteralExpression, IdExpression, // Statements. @@ -325,6 +330,88 @@ syntax::Leaf *nullPtrKeyword(); }; +/// Expression for user-defined literal. C++ [lex.ext] +/// user-defined-literal: +/// user-defined-integer-literal +/// user-defined-floating-point-literal +/// user-defined-string-literal +/// user-defined-character-literal +class UserDefinedLiteralExpression : public Expression { +public: + UserDefinedLiteralExpression(NodeKind K) : Expression(K) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownUserDefinedLiteralExpression || + N->kind() == NodeKind::IntegerUserDefinedLiteralExpression || + N->kind() == NodeKind::FloatUserDefinedLiteralExpression || + N->kind() == NodeKind::CharUserDefinedLiteralExpression || + N->kind() == NodeKind::StringUserDefinedLiteralExpression; + } + syntax::Leaf *literalToken(); +}; + +// We cannot yet distinguish between user-defined-integer-literal and +// user-defined-floating-point-literal, when using raw literal operator or +// numeric literal operator. C++ [lex.ext]p3, p4 +/// Expression for an unknown user-defined-literal. +class UnknownUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + UnknownUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::UnknownUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::UnknownUserDefinedLiteralExpression; + } +}; + +/// Expression for user-defined-integer-literal. C++ [lex.ext] +class IntegerUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + IntegerUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::IntegerUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::IntegerUserDefinedLiteralExpression; + } +}; + +/// Expression for user-defined-floating-point-literal. C++ [lex.ext] +class FloatUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + FloatUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::FloatUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::FloatUserDefinedLiteralExpression; + } +}; + +/// Expression for user-defined-character-literal. C++ [lex.ext] +class CharUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + CharUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::CharUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::CharUserDefinedLiteralExpression; + } +}; + +/// Expression for user-defined-string-literal. C++ [lex.ext] +class StringUserDefinedLiteralExpression final + : public UserDefinedLiteralExpression { +public: + StringUserDefinedLiteralExpression() + : UserDefinedLiteralExpression( + NodeKind::StringUserDefinedLiteralExpression) {} + static bool classof(const Node *N) { + return N->kind() == NodeKind::StringUserDefinedLiteralExpression; + } +}; + /// An abstract class for prefix and postfix unary operators. class UnaryOperatorExpression : public Expression { public: diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp --- a/clang/lib/Tooling/Syntax/BuildTree.cpp +++ b/clang/lib/Tooling/Syntax/BuildTree.cpp @@ -215,7 +215,8 @@ } if (Initializer.isValid()) { auto InitializerEnd = Initializer.getEnd(); - assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || End == InitializerEnd); + assert(SM.isBeforeInTranslationUnit(End, InitializerEnd) || + End == InitializerEnd); End = InitializerEnd; } return SourceRange(Start, End); @@ -707,6 +708,40 @@ return NNS; } + bool TraverseUserDefinedLiteral(UserDefinedLiteral *S) { + // The semantic AST node `UserDefinedLiteral` (UDL) may have one child node + // referencing the location of the UDL suffix (`_w` in `1.2_w`). The + // UDL suffix location does not point to the beginning of a token, so we + // can't represent the UDL suffix as a separate syntax tree node. + + return WalkUpFromUserDefinedLiteral(S); + } + + syntax::NodeKind getUserDefinedLiteralKind(UserDefinedLiteral *S) { + switch (S->getLiteralOperatorKind()) { + case clang::UserDefinedLiteral::LOK_Integer: + return syntax::NodeKind::IntegerUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Floating: + return syntax::NodeKind::FloatUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Character: + return syntax::NodeKind::CharUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_String: + return syntax::NodeKind::StringUserDefinedLiteralExpression; + case clang::UserDefinedLiteral::LOK_Raw: + case clang::UserDefinedLiteral::LOK_Template: + return syntax::NodeKind::UnknownUserDefinedLiteralExpression; + } + } + + bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) { + Builder.markChildToken(S->getBeginLoc(), syntax::NodeRole::LiteralToken); + Builder.foldNode(Builder.getExprRange(S), + new (allocator()) syntax::UserDefinedLiteralExpression( + getUserDefinedLiteralKind(S)), + S); + return true; + } + bool WalkUpFromDeclRefExpr(DeclRefExpr *S) { if (auto *NNS = BuildNestedNameSpecifier(S->getQualifierLoc())) Builder.markChild(NNS, syntax::NodeRole::IdExpression_qualifier); @@ -816,9 +851,9 @@ bool TraverseCXXOperatorCallExpr(CXXOperatorCallExpr *S) { if (getOperatorNodeKind(*S) == syntax::NodeKind::PostfixUnaryOperatorExpression) { - // A postfix unary operator is declared as taking two operands. The second - // operand is used to distinguish from its prefix counterpart. In the - // semantic AST this "phantom" operand is represented as a + // A postfix unary operator is declared as taking two operands. The + // second operand is used to distinguish from its prefix counterpart. In + // the semantic AST this "phantom" operand is represented as a // `IntegerLiteral` with invalid `SourceLocation`. We skip visiting this // operand because it does not correspond to anything written in source // code diff --git a/clang/lib/Tooling/Syntax/Nodes.cpp b/clang/lib/Tooling/Syntax/Nodes.cpp --- a/clang/lib/Tooling/Syntax/Nodes.cpp +++ b/clang/lib/Tooling/Syntax/Nodes.cpp @@ -32,6 +32,16 @@ return OS << "BoolLiteralExpression"; case NodeKind::CxxNullPtrExpression: return OS << "CxxNullPtrExpression"; + case NodeKind::UnknownUserDefinedLiteralExpression: + return OS << "UnknownUserDefinedLiteralExpression"; + case NodeKind::IntegerUserDefinedLiteralExpression: + return OS << "IntegerUserDefinedLiteralExpression"; + case NodeKind::FloatUserDefinedLiteralExpression: + return OS << "FloatUserDefinedLiteralExpression"; + case NodeKind::CharUserDefinedLiteralExpression: + return OS << "CharUserDefinedLiteralExpression"; + case NodeKind::StringUserDefinedLiteralExpression: + return OS << "StringUserDefinedLiteralExpression"; case NodeKind::PrefixUnaryOperatorExpression: return OS << "PrefixUnaryOperatorExpression"; case NodeKind::PostfixUnaryOperatorExpression: @@ -252,6 +262,11 @@ findChild(syntax::NodeRole::LiteralToken)); } +syntax::Leaf *syntax::UserDefinedLiteralExpression::literalToken() { + return llvm::cast_or_null( + findChild(syntax::NodeRole::LiteralToken)); +} + syntax::Expression *syntax::BinaryOperatorExpression::lhs() { return llvm::cast_or_null( findChild(syntax::NodeRole::BinaryOperatorExpression_leftHandSide)); diff --git a/clang/unittests/Tooling/Syntax/TreeTest.cpp b/clang/unittests/Tooling/Syntax/TreeTest.cpp --- a/clang/unittests/Tooling/Syntax/TreeTest.cpp +++ b/clang/unittests/Tooling/Syntax/TreeTest.cpp @@ -1184,20 +1184,106 @@ )txt")); } -TEST_P(SyntaxTreeTest, IntegerLiteral) { +TEST_P(SyntaxTreeTest, UserDefinedLiteral) { + if (!GetParam().isCXX11OrLater()) { + return; + } EXPECT_TRUE(treeDumpEqual( R"cpp( +unsigned operator "" _i(unsigned long long); +unsigned operator "" _f(long double); +unsigned operator "" _c(char); + +unsigned operator "" _r(const char*); // raw-literal operator + +template +unsigned operator "" _t(); // numeric literal operator template + void test() { - 12; - 12u; - 12l; - 12ul; - 014; - 0XC; + 12_i; // call: operator "" _i(12uLL) | kind: integer + 1.2_f; // call: operator "" _f(1.2L) | kind: float + '2'_c; // call: operator "" _c('2') | kind: char + + // PROBLEM: How to discover the kind of user-defined-literal from the AST? + 12_r; // call: operator "" _r("12") | kind: integer + 1.2_r; // call: operator "" _i("1.2") | kind: float + 12_t; // call: operator<'1', '2'> "" _x() | kind: integer + 1.2_t; // call: operator<'1', '2'> "" _x() | kind: float } -)cpp", + )cpp", R"txt( *: TranslationUnit +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_i +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-unsigned +| | | |-long +| | | `-long +| | `-) +| `-; +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_f +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-long +| | | `-double +| | `-) +| `-; +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_c +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | `-char +| | `-) +| `-; +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_r +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-const +| | | |-char +| | | `-SimpleDeclarator +| | | `-* +| | `-) +| `-; +|-TemplateDeclaration +| |-template +| |-< +| |-SimpleDeclaration +| | `-char +| |-... +| |-> +| `-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_t +| | `-ParametersAndQualifiers +| | |-( +| | `-) +| `-; `-SimpleDeclaration |-void |-SimpleDeclarator @@ -1208,28 +1294,95 @@ `-CompoundStatement |-{ |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12 + | |-IntegerUserDefinedLiteralExpression + | | `-12_i | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12u + | |-FloatUserDefinedLiteralExpression + | | `-1.2_f | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12l + | |-CharUserDefinedLiteralExpression + | | `-'2'_c | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-12ul + | |-UnknownUserDefinedLiteralExpression + | | `-12_r | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-014 + | |-UnknownUserDefinedLiteralExpression + | | `-1.2_r | `-; |-ExpressionStatement - | |-IntegerLiteralExpression - | | `-0XC + | |-UnknownUserDefinedLiteralExpression + | | `-12_t + | `-; + |-ExpressionStatement + | |-UnknownUserDefinedLiteralExpression + | | `-1.2_t + | `-; + `-} +)txt")); +} + +TEST_P(SyntaxTreeTest, UserDefinedLiteralString) { + if (!GetParam().isCXX11OrLater()) { + return; + } + EXPECT_TRUE(treeDumpEqual( + R"cpp( +typedef decltype(sizeof(void *)) size_t; +unsigned operator "" _s(const char*, size_t); +void test() { + "12"_s;// call: operator "" _s("12") | kind: string +} + )cpp", + R"txt( +*: TranslationUnit +|-SimpleDeclaration +| |-typedef +| |-decltype +| |-( +| |-UnknownExpression +| | |-sizeof +| | |-( +| | |-void +| | |-* +| | `-) +| |-) +| |-SimpleDeclarator +| | `-size_t +| `-; +|-SimpleDeclaration +| |-unsigned +| |-SimpleDeclarator +| | |-operator +| | |-"" +| | |-_s +| | `-ParametersAndQualifiers +| | |-( +| | |-SimpleDeclaration +| | | |-const +| | | |-char +| | | `-SimpleDeclarator +| | | `-* +| | |-, +| | |-SimpleDeclaration +| | | `-size_t +| | `-) +| `-; +`-SimpleDeclaration + |-void + |-SimpleDeclarator + | |-test + | `-ParametersAndQualifiers + | |-( + | `-) + `-CompoundStatement + |-{ + |-ExpressionStatement + | |-StringUserDefinedLiteralExpression + | | `-"12"_s | `-; `-} )txt"));