diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h @@ -46,7 +46,7 @@ LRTable Table; // Binding extension ids to corresponding implementations. - llvm::DenseMap Guards; + llvm::DenseMap Guards; llvm::DenseMap RecoveryStrategies; // FIXME: add clang::LangOptions. diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h @@ -28,12 +28,12 @@ // ), and an extension point corresponds to a piece of native code. For // example, C++ grammar has a rule: // -// contextual-override := IDENTIFIER [guard=Override] +// compound_statement := { statement-seq [recover=Brackets] } // -// GLR parser only conducts the reduction of the rule if the IDENTIFIER -// content is `override`. This Override guard is implemented in CXX.cpp by -// binding the ExtensionID for the `Override` value to a specific C++ function -// that performs the check. +// The `recover` attribute instructs the parser that we should perform error +// recovery if parsing the statement-seq fails. The `Brackets` recovery +// heuristic is implemented in CXX.cpp by binding the ExtensionID for the +// `Recovery` value to a specific C++ function that finds the recovery point. // // Notions about the BNF grammar: // - "_" is the start symbol of the augmented grammar; @@ -118,11 +118,8 @@ uint8_t Size : SizeBits; // Size of the Sequence SymbolID Sequence[MaxElements]; - // A guard extension controls whether a reduction of a rule will be conducted - // by the GLR parser. - // 0 is sentinel unset extension ID, indicating there is no guard extension - // being set for this rule. - ExtensionID Guard = 0; + // A guarded rule has extra logic to determine whether the RHS is eligible. + bool Guarded = false; // Specifies the index within Sequence eligible for error recovery. // Given stmt := { stmt-seq_opt }, if we fail to parse the stmt-seq then we @@ -136,7 +133,7 @@ return llvm::ArrayRef(Sequence, Size); } friend bool operator==(const Rule &L, const Rule &R) { - return L.Target == R.Target && L.seq() == R.seq() && L.Guard == R.Guard; + return L.Target == R.Target && L.seq() == R.seq() && L.Guarded == R.Guarded; } }; diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -416,11 +416,11 @@ } private: - bool canReduce(ExtensionID GuardID, RuleID RID, + bool canReduce(const Rule &R, RuleID RID, llvm::ArrayRef RHS) const { - if (!GuardID) + if (!R.Guarded) return true; - if (auto Guard = Lang.Guards.lookup(GuardID)) + if (auto Guard = Lang.Guards.lookup(RID)) return Guard(RHS, Params.Code); LLVM_DEBUG(llvm::dbgs() << llvm::formatv("missing guard implementation for rule {0}\n", @@ -441,7 +441,7 @@ for (const auto *B : N->parents()) llvm::dbgs() << " --> base at S" << B->State << "\n"; }); - if (!canReduce(Rule.Guard, RID, TempSequence)) + if (!canReduce(Rule, RID, TempSequence)) return; // Copy the chain to stable storage so it can be enqueued. if (SequenceStorageCount == SequenceStorage.size()) @@ -572,7 +572,7 @@ TempSequence[Rule.Size - 1 - I] = Base->Payload; Base = Base->parents().front(); } - if (!canReduce(Rule.Guard, *RID, TempSequence)) + if (!canReduce(Rule, *RID, TempSequence)) return true; // reduction is not available const ForestNode *Parsed = &Params.Forest.createSequence(Rule.Target, *RID, TempSequence); diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -11,6 +11,9 @@ #include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" +#include "clang/Basic/CharInfo.h" +#include "clang/Basic/TokenKinds.h" +#include "llvm/ADT/StringSwitch.h" #include namespace clang { @@ -21,29 +24,88 @@ #include "CXXBNF.inc" ; -bool guardOverride(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override"; +// User-defined string literals look like `""suffix`. +bool isStringUserDefined(const Token &Tok) { + return !Tok.text().endswith("\""); } -bool guardFinal(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final"; -} -bool guardModule(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "module"; +bool isCharUserDefined(const Token &Tok) { return !Tok.text().endswith("'"); } + +// Combinable flags describing numbers. +// Clang has just one numeric_token kind, the grammar has 4. +enum NumericKind { + Integer = 0, + Floating = 1 << 0, + UserDefined = 1 << 1, +}; +// Determine the kind of numeric_constant we have. +// We can assume it's something valid, as it has been lexed. +// FIXME: is this expensive enough that we should set flags on the token +// and reuse them rather than computing it for each guard? +unsigned numKind(const Token &Tok) { + assert(Tok.Kind == tok::numeric_constant); + llvm::StringRef Text = Tok.text(); + if (Text.size() <= 1) + return Integer; + bool Hex = + Text.size() > 2 && Text[0] == '0' && (Text[1] == 'x' || Text[1] == 'X'); + uint8_t K = Integer; + + for (char C : Text) { + switch (C) { + case '.': + K |= Floating; + break; + case 'e': + case 'E': + if (!Hex) + K |= Floating; + break; + case 'p': + case 'P': + if (Hex) + K |= Floating; + break; + case '_': + K |= UserDefined; + break; + default: + break; + } + } + + // We would be done here, but there are stdlib UDLs that lack _. + // We must distinguish these from the builtin suffixes. + unsigned LastLetter = Text.size(); + while (LastLetter > 0 && isLetter(Text[LastLetter - 1])) + --LastLetter; + if (LastLetter == Text.size()) // Common case + return NumericKind(K); + // Trailing d/e/f are not part of the suffix in hex numbers. + while (Hex && LastLetter < Text.size() && isHexDigit(Text[LastLetter])) + ++LastLetter; + return llvm::StringSwitch(Text.substr(LastLetter)) + // std::chrono + .Cases("h", "min", "s", "ms", "us", "ns", "d", "y", K | UserDefined) + // complex + .Cases("il", "i", "if", K | UserDefined) + .Default(K); } -bool guardImport(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "import"; + +// RHS is expected to contain a single terminal. +// Returns the corresponding token. +const Token &onlyToken(tok::TokenKind Kind, + const ArrayRef RHS, + const TokenStream &Tokens) { + assert(RHS.size() == 1 && RHS.front()->symbol() == tokenSymbol(Kind)); + return Tokens.tokens()[RHS.front()->startTokenIndex()]; } -bool guardExport(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "export"; +// RHS is expected to contain a single symbol. +// Returns the corresponding ForestNode. +const ForestNode &onlySymbol(SymbolID Kind, + const ArrayRef RHS, + const TokenStream &Tokens) { + assert(RHS.size() == 1 && RHS.front()->symbol() == Kind); + return *RHS.front(); } bool isFunctionDeclarator(const ForestNode *Declarator) { @@ -93,29 +155,92 @@ } llvm_unreachable("unreachable"); } -bool guardFunction(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == (SymbolID)(cxx::Symbol::declarator)); - return isFunctionDeclarator(RHS.front()); -} -bool guardNonFunction(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == (SymbolID)(cxx::Symbol::declarator)); - return !isFunctionDeclarator(RHS.front()); -} llvm::DenseMap buildGuards() { +#define TOKEN_GUARD(kind, cond) \ + [](llvm::ArrayRef RHS, const TokenStream &Tokens) { \ + const Token &Tok = onlyToken(tok::kind, RHS, Tokens); \ + return cond; \ + } +#define SYMBOL_GUARD(kind, cond) \ + [](llvm::ArrayRef RHS, const TokenStream &Tokens) { \ + const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, RHS, Tokens); \ + return cond; \ + } return { - {(ExtensionID)Extension::Override, guardOverride}, - {(ExtensionID)Extension::Final, guardFinal}, - {(ExtensionID)Extension::Import, guardImport}, - {(ExtensionID)Extension::Export, guardExport}, - {(ExtensionID)Extension::Module, guardModule}, - {(ExtensionID)Extension::FunctionDeclarator, guardFunction}, - {(ExtensionID)Extension::NonFunctionDeclarator, guardNonFunction}, + {(RuleID)Rule::function_declarator_0declarator, + SYMBOL_GUARD(declarator, isFunctionDeclarator(&N))}, + {(RuleID)Rule::non_function_declarator_0declarator, + SYMBOL_GUARD(declarator, !isFunctionDeclarator(&N))}, + + {(RuleID)Rule::contextual_override_0identifier, + TOKEN_GUARD(identifier, Tok.text() == "override")}, + {(RuleID)Rule::contextual_final_0identifier, + TOKEN_GUARD(identifier, Tok.text() == "final")}, + {(RuleID)Rule::import_keyword_0identifier, + TOKEN_GUARD(identifier, Tok.text() == "import")}, + {(RuleID)Rule::export_keyword_0identifier, + TOKEN_GUARD(identifier, Tok.text() == "export")}, + {(RuleID)Rule::module_keyword_0identifier, + TOKEN_GUARD(identifier, Tok.text() == "module")}, + {(RuleID)Rule::contextual_zero_0numeric_constant, + TOKEN_GUARD(numeric_constant, Tok.text() == "0")}, + + // The grammar distinguishes (only) user-defined vs plain string literals, + // where the clang lexer distinguishes (only) encoding types. + {(RuleID)Rule::user_defined_string_literal_chunk_0string_literal, + TOKEN_GUARD(string_literal, isStringUserDefined(Tok))}, + {(RuleID)Rule::user_defined_string_literal_chunk_0utf8_string_literal, + TOKEN_GUARD(utf8_string_literal, isStringUserDefined(Tok))}, + {(RuleID)Rule::user_defined_string_literal_chunk_0utf16_string_literal, + TOKEN_GUARD(utf16_string_literal, isStringUserDefined(Tok))}, + {(RuleID)Rule::user_defined_string_literal_chunk_0utf32_string_literal, + TOKEN_GUARD(utf32_string_literal, isStringUserDefined(Tok))}, + {(RuleID)Rule::user_defined_string_literal_chunk_0wide_string_literal, + TOKEN_GUARD(wide_string_literal, isStringUserDefined(Tok))}, + {(RuleID)Rule::string_literal_chunk_0string_literal, + TOKEN_GUARD(string_literal, !isStringUserDefined(Tok))}, + {(RuleID)Rule::string_literal_chunk_0utf8_string_literal, + TOKEN_GUARD(utf8_string_literal, !isStringUserDefined(Tok))}, + {(RuleID)Rule::string_literal_chunk_0utf16_string_literal, + TOKEN_GUARD(utf16_string_literal, !isStringUserDefined(Tok))}, + {(RuleID)Rule::string_literal_chunk_0utf32_string_literal, + TOKEN_GUARD(utf32_string_literal, !isStringUserDefined(Tok))}, + {(RuleID)Rule::string_literal_chunk_0wide_string_literal, + TOKEN_GUARD(wide_string_literal, !isStringUserDefined(Tok))}, + // And the same for chars. + {(RuleID)Rule::user_defined_character_literal_0char_constant, + TOKEN_GUARD(char_constant, isCharUserDefined(Tok))}, + {(RuleID)Rule::user_defined_character_literal_0utf8_char_constant, + TOKEN_GUARD(utf8_char_constant, isCharUserDefined(Tok))}, + {(RuleID)Rule::user_defined_character_literal_0utf16_char_constant, + TOKEN_GUARD(utf16_char_constant, isCharUserDefined(Tok))}, + {(RuleID)Rule::user_defined_character_literal_0utf32_char_constant, + TOKEN_GUARD(utf32_char_constant, isCharUserDefined(Tok))}, + {(RuleID)Rule::user_defined_character_literal_0wide_char_constant, + TOKEN_GUARD(wide_char_constant, isCharUserDefined(Tok))}, + {(RuleID)Rule::character_literal_0char_constant, + TOKEN_GUARD(char_constant, !isCharUserDefined(Tok))}, + {(RuleID)Rule::character_literal_0utf8_char_constant, + TOKEN_GUARD(utf8_char_constant, !isCharUserDefined(Tok))}, + {(RuleID)Rule::character_literal_0utf16_char_constant, + TOKEN_GUARD(utf16_char_constant, !isCharUserDefined(Tok))}, + {(RuleID)Rule::character_literal_0utf32_char_constant, + TOKEN_GUARD(utf32_char_constant, !isCharUserDefined(Tok))}, + {(RuleID)Rule::character_literal_0wide_char_constant, + TOKEN_GUARD(wide_char_constant, !isCharUserDefined(Tok))}, + // clang just has one NUMERIC_CONSTANT token for {ud,plain}x{float,int} + {(RuleID)Rule::user_defined_integer_literal_0numeric_constant, + TOKEN_GUARD(numeric_constant, numKind(Tok) == (Integer | UserDefined))}, + {(RuleID)Rule::user_defined_floating_point_literal_0numeric_constant, + TOKEN_GUARD(numeric_constant, numKind(Tok) == (Floating | UserDefined))}, + {(RuleID)Rule::integer_literal_0numeric_constant, + TOKEN_GUARD(numeric_constant, numKind(Tok) == Integer)}, + {(RuleID)Rule::floating_point_literal_0numeric_constant, + TOKEN_GUARD(numeric_constant, numKind(Tok) == Floating)}, }; +#undef TOKEN_GUARD +#undef SYMBOL_GUARD } Token::Index recoverBrackets(Token::Index Begin, const TokenStream &Tokens) { diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf --- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -413,8 +413,8 @@ #! to eliminate these false parses. init-declarator := non-function-declarator initializer_opt init-declarator := function-declarator requires-clause_opt -function-declarator := declarator [guard=FunctionDeclarator] -non-function-declarator := declarator [guard=NonFunctionDeclarator] +function-declarator := declarator [guard] +non-function-declarator := declarator [guard] declarator := ptr-declarator declarator := noptr-declarator parameters-and-qualifiers trailing-return-type ptr-declarator := noptr-declarator @@ -715,18 +715,18 @@ literal := boolean-literal literal := pointer-literal literal := user-defined-literal -integer-literal := NUMERIC_CONSTANT -character-literal := CHAR_CONSTANT -character-literal := WIDE_CHAR_CONSTANT -character-literal := UTF8_CHAR_CONSTANT -character-literal := UTF16_CHAR_CONSTANT -character-literal := UTF32_CHAR_CONSTANT -floating-point-literal := NUMERIC_CONSTANT -string-literal-chunk := STRING_LITERAL -string-literal-chunk := WIDE_STRING_LITERAL -string-literal-chunk := UTF8_STRING_LITERAL -string-literal-chunk := UTF16_STRING_LITERAL -string-literal-chunk := UTF32_STRING_LITERAL +integer-literal := NUMERIC_CONSTANT [guard] +character-literal := CHAR_CONSTANT [guard] +character-literal := WIDE_CHAR_CONSTANT [guard] +character-literal := UTF8_CHAR_CONSTANT [guard] +character-literal := UTF16_CHAR_CONSTANT [guard] +character-literal := UTF32_CHAR_CONSTANT [guard] +floating-point-literal := NUMERIC_CONSTANT [guard] +string-literal-chunk := STRING_LITERAL [guard] +string-literal-chunk := WIDE_STRING_LITERAL [guard] +string-literal-chunk := UTF8_STRING_LITERAL [guard] +string-literal-chunk := UTF16_STRING_LITERAL [guard] +string-literal-chunk := UTF32_STRING_LITERAL [guard] #! Technically, string concatenation happens at phase 6 which is before parsing, #! so it doesn't belong to the grammar. However, we extend the grammar to #! support it, to make the pseudoparser fully functional on practical code. @@ -736,33 +736,33 @@ user-defined-literal := user-defined-floating-point-literal user-defined-literal := user-defined-string-literal user-defined-literal := user-defined-character-literal -user-defined-integer-literal := NUMERIC_CONSTANT -user-defined-string-literal-chunk := STRING_LITERAL -user-defined-string-literal-chunk := WIDE_STRING_LITERAL -user-defined-string-literal-chunk := UTF8_STRING_LITERAL -user-defined-string-literal-chunk := UTF16_STRING_LITERAL -user-defined-string-literal-chunk := UTF32_STRING_LITERAL +user-defined-integer-literal := NUMERIC_CONSTANT [guard] +user-defined-string-literal-chunk := STRING_LITERAL [guard] +user-defined-string-literal-chunk := WIDE_STRING_LITERAL [guard] +user-defined-string-literal-chunk := UTF8_STRING_LITERAL [guard] +user-defined-string-literal-chunk := UTF16_STRING_LITERAL [guard] +user-defined-string-literal-chunk := UTF32_STRING_LITERAL [guard] user-defined-string-literal := user-defined-string-literal-chunk user-defined-string-literal := string-literal-chunk user-defined-string-literal user-defined-string-literal := user-defined-string-literal string-literal-chunk -user-defined-floating-point-literal := NUMERIC_CONSTANT -user-defined-character-literal := CHAR_CONSTANT -user-defined-character-literal := WIDE_CHAR_CONSTANT -user-defined-character-literal := UTF8_CHAR_CONSTANT -user-defined-character-literal := UTF16_CHAR_CONSTANT -user-defined-character-literal := UTF32_CHAR_CONSTANT +user-defined-floating-point-literal := NUMERIC_CONSTANT [guard] +user-defined-character-literal := CHAR_CONSTANT [guard] +user-defined-character-literal := WIDE_CHAR_CONSTANT [guard] +user-defined-character-literal := UTF8_CHAR_CONSTANT [guard] +user-defined-character-literal := UTF16_CHAR_CONSTANT [guard] +user-defined-character-literal := UTF32_CHAR_CONSTANT [guard] boolean-literal := FALSE boolean-literal := TRUE pointer-literal := NULLPTR #! Contextual keywords -- clang lexer always lexes them as identifier tokens. #! Placeholders for literal text in the grammar that lex as other things. -contextual-override := IDENTIFIER [guard=Override] -contextual-final := IDENTIFIER [guard=Final] -contextual-zero := NUMERIC_CONSTANT [guard=Zero] -module-keyword := IDENTIFIER [guard=Module] -import-keyword := IDENTIFIER [guard=Import] -export-keyword := IDENTIFIER [guard=Export] +contextual-override := IDENTIFIER [guard] +contextual-final := IDENTIFIER [guard] +contextual-zero := NUMERIC_CONSTANT [guard] +module-keyword := IDENTIFIER [guard] +import-keyword := IDENTIFIER [guard] +export-keyword := IDENTIFIER [guard] #! greatergreater token -- clang lexer always lexes it as a single token, we #! split it into two tokens to make the GLR parser aware of the nested-template diff --git a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp --- a/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/Grammar.cpp @@ -86,8 +86,8 @@ if (R.RecoveryIndex == I) OS << " [recover=" << T->AttributeValues[R.Recovery] << "]"; } - if (R.Guard) - OS << " [guard=" << T->AttributeValues[R.Guard] << "]"; + if (R.Guarded) + OS << " [guard]"; return Result; } diff --git a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp --- a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp @@ -76,6 +76,7 @@ }); // Add an empty string for the corresponding sentinel unset attribute. T->AttributeValues.push_back(""); + UniqueAttributeValues.erase(""); llvm::for_each(UniqueAttributeValues, [&T](llvm::StringRef Name) { T->AttributeValues.emplace_back(); T->AttributeValues.back() = Name.str(); @@ -258,7 +259,7 @@ for (unsigned I = 0; I < Spec.Sequence.size(); ++I) { for (const auto &KV : Spec.Sequence[I].Attributes) { if (KV.first == "guard") { - R.Guard = LookupExtensionID(KV.second); + R.Guarded = true; } else if (KV.first == "recover") { R.Recovery = LookupExtensionID(KV.second); R.RecoveryIndex = I; diff --git a/clang-tools-extra/pseudo/test/cxx/literals.cpp b/clang-tools-extra/pseudo/test/cxx/literals.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/test/cxx/literals.cpp @@ -0,0 +1,43 @@ +// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest -forest-abbrev=0 | FileCheck %s --implicit-check-not=ambiguous +auto list = { + 0, // CHECK: := integer-literal + 0b1011, // CHECK: := integer-literal + 0777, // CHECK: := integer-literal + 42_u, // CHECK: := user-defined-integer-literal + 0LL, // CHECK: := integer-literal + 0h, // CHECK: := user-defined-integer-literal + 0., // CHECK: := floating-point-literal + .2, // CHECK: := floating-point-literal + 2e1, // CHECK: := floating-point-literal + 0x42d, // CHECK: := integer-literal + 0x42_d, // CHECK: := user-defined-integer-literal + 0x42ds, // CHECK: := user-defined-integer-literal + 0x1.2p2,// CHECK: := floating-point-literal + + "", // CHECK: literal := string-literal + L"", // CHECK: literal := string-literal + u8"", // CHECK: literal := string-literal + u"", // CHECK: literal := string-literal + U"", // CHECK: literal := string-literal + R"()", // CHECK: literal := string-literal + uR"()", // CHECK: literal := string-literal + "a" "b", // CHECK: literal := string-literal + u8"a" "b", // CHECK: literal := string-literal + u"a" u"b", // CHECK: literal := string-literal + "a"_u "b", // CHECK: user-defined-literal := user-defined-string-literal + "a"_u u"b", // CHECK: user-defined-literal := user-defined-string-literal + R"(a)" "\n", // CHECK: literal := string-literal + R"c(a)c"_u u"\n", // CHECK: user-defined-literal := user-defined-string-literal + + 'a', // CHECK: := character-literal + 'abc', // CHECK: := character-literal + 'abcdef', // CHECK: := character-literal + u'a', // CHECK: := character-literal + U'a', // CHECK: := character-literal + L'a', // CHECK: := character-literal + L'abc', // CHECK: := character-literal + U'\u1234',// CHECK: := character-literal + '\u1234', // CHECK: := character-literal + u'a'_u, // CHECK: := user-defined-character-literal +}; + diff --git a/clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp b/clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp --- a/clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp +++ b/clang-tools-extra/pseudo/test/cxx/mixed-designator.cpp @@ -5,16 +5,16 @@ // CHECK-NEXT: ├─{ := tok[3] // CHECK-NEXT: ├─initializer-list // CHECK-NEXT: │ ├─initializer-list -// CHECK-NEXT: │ │ ├─initializer-list~literal -// CHECK: │ │ ├─, := tok[5] +// CHECK-NEXT: │ │ ├─initializer-list~NUMERIC_CONSTANT +// CHECK-NEXT: │ │ ├─, := tok[5] // CHECK-NEXT: │ │ └─initializer-list-item // CHECK-NEXT: │ │ ├─designator // CHECK-NEXT: │ │ │ ├─. := tok[6] // CHECK-NEXT: │ │ │ └─IDENTIFIER := tok[7] // CHECK-NEXT: │ │ └─brace-or-equal-initializer // CHECK-NEXT: │ │ ├─= := tok[8] -// CHECK-NEXT: │ │ └─initializer-clause~literal -// CHECK: │ ├─, := tok[10] +// CHECK-NEXT: │ │ └─initializer-clause~NUMERIC_CONSTANT +// CHECK-NEXT: │ ├─, := tok[10] // CHECK-NEXT: │ └─initializer-list-item // CHECK-NEXT: │ ├─designator // CHECK-NEXT: │ │ ├─[ := tok[11] @@ -22,6 +22,6 @@ // CHECK-NEXT: │ │ └─] := tok[13] // CHECK-NEXT: │ └─brace-or-equal-initializer~braced-init-list // CHECK-NEXT: │ ├─{ := tok[14] -// CHECK-NEXT: │ ├─initializer-list~literal +// CHECK-NEXT: │ ├─initializer-list~NUMERIC_CONSTANT // CHECK: │ └─} := tok[16] // CHECK-NEXT: └─} := tok[17] diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp --- a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp +++ b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp @@ -45,6 +45,8 @@ desc("Strip directives and select conditional sections")); static opt PrintStatistics("print-statistics", desc("Print GLR parser statistics")); static opt PrintForest("print-forest", desc("Print parse forest")); +static opt ForestAbbrev("forest-abbrev", desc("Abbreviate parse forest"), + init(true)); static opt HTMLForest("html-forest", desc("output file for HTML forest")); static opt StartSymbol("start-symbol", @@ -153,7 +155,7 @@ glrParse(clang::pseudo::ParseParams{*ParseableStream, Arena, GSS}, *StartSymID, Lang); if (PrintForest) - llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/true); + llvm::outs() << Root.dumpRecursive(Lang.G, /*Abbreviated=*/ForestAbbrev); if (HTMLForest.getNumOccurrences()) { std::error_code EC; diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -631,10 +631,10 @@ build(R"bnf( _ := start - start := IDENTIFIER [guard=TestOnly] + start := IDENTIFIER [guard] )bnf"); TestLang.Guards.try_emplace( - extensionID("TestOnly"), + ruleFor("start"), [&](llvm::ArrayRef RHS, const TokenStream &Tokens) { assert(RHS.size() == 1 && RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); @@ -647,7 +647,7 @@ const TokenStream &Succeeded = cook(lex(Input, LOptions), LOptions); EXPECT_EQ(glrParse({Succeeded, Arena, GSStack}, id("start"), TestLang) .dumpRecursive(TestLang.G), - "[ 0, end) start := IDENTIFIER [guard=TestOnly]\n" + "[ 0, end) start := IDENTIFIER [guard]\n" "[ 0, end) └─IDENTIFIER := tok[0]\n"); Input = "notest"; diff --git a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp --- a/clang-tools-extra/pseudo/unittests/GrammarTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GrammarTest.cpp @@ -102,16 +102,11 @@ TEST_F(GrammarTest, Annotation) { build(R"bnf( _ := x - - x := y [guard=value] - y := IDENTIFIER [guard=final] - + x := IDENTIFIER [guard] )bnf"); - ASSERT_TRUE(Diags.empty()); - EXPECT_EQ(G.lookupRule(ruleFor("_")).Guard, 0); - EXPECT_GT(G.lookupRule(ruleFor("x")).Guard, 0); - EXPECT_GT(G.lookupRule(ruleFor("y")).Guard, 0); - EXPECT_NE(G.lookupRule(ruleFor("x")).Guard, G.lookupRule(ruleFor("y")).Guard); + ASSERT_THAT(Diags, IsEmpty()); + EXPECT_FALSE(G.lookupRule(ruleFor("_")).Guarded); + EXPECT_TRUE(G.lookupRule(ruleFor("x")).Guarded); } TEST_F(GrammarTest, MangleName) {