diff --git a/clang/include/clang/Tooling/Syntax/Pseudo/Token.h b/clang/include/clang/Tooling/Syntax/Pseudo/Token.h --- a/clang/include/clang/Tooling/Syntax/Pseudo/Token.h +++ b/clang/include/clang/Tooling/Syntax/Pseudo/Token.h @@ -181,7 +181,8 @@ NeedsCleaning = 1 << 1, }; -/// Derives a token stream by decoding escapes and interpreting raw_identifiers. +/// Derives a token stream by decoding escapes, interpreting raw_identifiers and +/// splitting the greatergreater token. /// /// Tokens containing UCNs, escaped newlines, trigraphs etc are decoded and /// their backing data is owned by the returned stream. diff --git a/clang/lib/Tooling/Syntax/Pseudo/Lex.cpp b/clang/lib/Tooling/Syntax/Pseudo/Lex.cpp --- a/clang/lib/Tooling/Syntax/Pseudo/Lex.cpp +++ b/clang/lib/Tooling/Syntax/Pseudo/Lex.cpp @@ -99,10 +99,21 @@ Tok.Length = Text.size(); Tok.Flags &= ~static_cast(LexFlags::NeedsCleaning); } - // Cook raw_identifiers into identifier, keyword, etc. - if (Tok.Kind == tok::raw_identifier) + + if (Tok.Kind == tok::raw_identifier) { + // Cook raw_identifiers into identifier, keyword, etc. Tok.Kind = Identifiers.get(Tok.text()).getTokenID(); - Result.push(std::move(Tok)); + } else if (Tok.Kind == tok::greatergreater) { + // Split the greatergreater token. + // FIXME: split lessless token to support Cuda triple angle brackets <<<. + assert(Tok.text() == ">>"); + Tok.Kind = tok::greater; + Tok.Length = 1; + Result.push(Tok); + // Line is wrong if the first greater is followed by an escaped newline! + Tok.Data = Tok.text().data() + 1; + } + Result.push((Tok)); } Result.finalize(); diff --git a/clang/lib/Tooling/Syntax/Pseudo/Token.cpp b/clang/lib/Tooling/Syntax/Pseudo/Token.cpp --- a/clang/lib/Tooling/Syntax/Pseudo/Token.cpp +++ b/clang/lib/Tooling/Syntax/Pseudo/Token.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "clang/Tooling/Syntax/Pseudo/Token.h" +#include "clang/Basic/TokenKinds.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormatVariadic.h" diff --git a/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf b/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf --- a/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf +++ b/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf @@ -13,6 +13,9 @@ # - the file merely describes the core C++ grammar. Preprocessor directives and # lexical conversions are omitted as we reuse clang's lexer and run a fake # preprocessor; +# - grammar rules with the >> token are adjusted, the greatergreater token is +# split into two > tokens, to make the GLR parser aware of nested templates +# and right shift operator. # # Guidelines: # - non-terminals are lower_case; terminals (aka tokens) correspond to @@ -96,7 +99,7 @@ fold-operator := ^ fold-operator := | fold-operator := << -fold-operator := >> +fold-operator := greatergreater fold-operator := += fold-operator := -= fold-operator := *= @@ -202,7 +205,7 @@ # expr.shift shift-expression := additive-expression shift-expression := shift-expression << additive-expression -shift-expression := shift-expression >> additive-expression +shift-expression := shift-expression greatergreater additive-expression # expr.spaceship compare-expression := shift-expression compare-expression := compare-expression <=> shift-expression @@ -615,7 +618,7 @@ operator-name := ^^ operator-name := || operator-name := << -operator-name := >> +operator-name := greatergreater operator-name := <<= operator-name := >>= operator-name := ++ @@ -737,3 +740,8 @@ module-keyword := IDENTIFIER import-keyword := IDENTIFIER export-keyword := IDENTIFIER + +#! greatergreater token -- clang lexer always lexes it as a single token, we +#! split it into two tokens to make the GLR parser aware of the nested-template +#! case. +greatergreater := > > diff --git a/clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp b/clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp --- a/clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp +++ b/clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp @@ -172,6 +172,25 @@ })); } +TEST(TokenTest, SplitGreaterGreater) { + LangOptions Opts; + std::string Code = R"cpp( +>> // split +// >> with an escaped newline in the middle, split +>\ +> +>>= // not split +)cpp"; + TokenStream Split = stripComments(cook(lex(Code, Opts), Opts)); + EXPECT_THAT(Split.tokens(), ElementsAreArray({ + token(">", tok::greater), + token(">", tok::greater), + token(">", tok::greater), + token(">", tok::greater), + token(">>=", tok::greatergreaterequal), + })); +} + TEST(TokenTest, DropComments) { LangOptions Opts; std::string Code = R"cpp(