diff --git a/clang/include/clang/Tooling/Syntax/Pseudo/Token.h b/clang/include/clang/Tooling/Syntax/Pseudo/Token.h --- a/clang/include/clang/Tooling/Syntax/Pseudo/Token.h +++ b/clang/include/clang/Tooling/Syntax/Pseudo/Token.h @@ -195,6 +195,11 @@ /// (And having cooked token kinds in PP-disabled sections is useful for us). TokenStream cook(const TokenStream &, const clang::LangOptions &); +/// Derives a token stream by splitting the greatergreater token. +/// +/// The greatergreater >> token is split into two greater tokens. +TokenStream splitGreaterGreater(const TokenStream &); + /// Drops comment tokens. TokenStream stripComments(const TokenStream &); diff --git a/clang/lib/Tooling/Syntax/Pseudo/Token.cpp b/clang/lib/Tooling/Syntax/Pseudo/Token.cpp --- a/clang/lib/Tooling/Syntax/Pseudo/Token.cpp +++ b/clang/lib/Tooling/Syntax/Pseudo/Token.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/Pseudo/Token.h" +#include "clang/Basic/TokenKinds.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" @@ -93,6 +94,29 @@ OS << '\n'; } +TokenStream splitGreaterGreater(const TokenStream &Input) { + TokenStream Out; + for (Token T : Input.tokens()) { + // FIXME: split lessless token to support Cuda's triple angle brackets <<<. + if (T.Kind == tok::greatergreater) { + if (T.text() == ">>") { + T.Kind = tok::greater; + T.Length = 1; + Out.push(T); + T.Data = T.text().data() + 1; + // FIXME: Line is wrong if the first greater is followed by an escaped + // newline. + Out.push(T); + continue; + } + assert(false && "split an uncook token stream!"); + } + Out.push(std::move(T)); + } + Out.finalize(); + return Out; +} + TokenStream stripComments(const TokenStream &Input) { TokenStream Out; for (const Token &T : Input.tokens()) { diff --git a/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf b/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf --- a/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf +++ b/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf @@ -13,6 +13,9 @@ # - the file merely describes the core C++ grammar. Preprocessor directives and # lexical conversions are omitted as we reuse clang's lexer and run a fake # preprocessor; +# - grammar rules with the >> token are adjusted, the greatergreater token is +# split into two > tokens, to make the GLR parser aware of nested templates +# and right shift operator. # # Guidelines: # - non-terminals are lower_case; terminals (aka tokens) correspond to @@ -96,7 +99,7 @@ fold-operator := ^ fold-operator := | fold-operator := << -fold-operator := >> +fold-operator := greatergreater fold-operator := += fold-operator := -= fold-operator := *= @@ -202,7 +205,7 @@ # expr.shift shift-expression := additive-expression shift-expression := shift-expression << additive-expression -shift-expression := shift-expression >> additive-expression +shift-expression := shift-expression greatergreater additive-expression # expr.spaceship compare-expression := shift-expression compare-expression := compare-expression <=> shift-expression @@ -615,7 +618,7 @@ operator-name := ^^ operator-name := || operator-name := << -operator-name := >> +operator-name := greatergreater operator-name := <<= operator-name := >>= operator-name := ++ @@ -737,3 +740,8 @@ module-keyword := IDENTIFIER import-keyword := IDENTIFIER export-keyword := IDENTIFIER + +#! Greatergreater token -- clang lexer always lexes it as a single token, we +#! split it into two tokens to make the GLR parser aware of the nested-template +#! case. +greatergreater := > > diff --git a/clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp b/clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp --- a/clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp +++ b/clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp @@ -172,6 +172,30 @@ })); } +TEST(TokenTest, SplitGreaterGreater) { + LangOptions Opts; + std::string Code = R"cpp( +>> // split +// >> with an escaped newline in the middle, split +>\ +> +>>= // not split +)cpp"; + TokenStream Raw = stripComments(cook(lex(Code, Opts), Opts)); + EXPECT_THAT(Raw.tokens(), + ElementsAreArray({token(">>", tok::greatergreater), + token(">>", tok::greatergreater), + token(">>=", tok::greatergreaterequal)})); + TokenStream Split = splitGreaterGreater(Raw); + EXPECT_THAT(Split.tokens(), ElementsAreArray({ + token(">", tok::greater), + token(">", tok::greater), + token(">", tok::greater), + token(">", tok::greater), + token(">>=", tok::greatergreaterequal), + })); +} + TEST(TokenTest, DropComments) { LangOptions Opts; std::string Code = R"cpp(