diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -14,7 +14,9 @@ #include "clang/Basic/CharInfo.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/Support/Debug.h" #include +#define DEBUG_TYPE "CXX.cpp" namespace clang { namespace pseudo { @@ -160,7 +162,110 @@ return symbolToToken(P.Lookahead) != tok::kw_else; } +// Whether this e.g. decl-specifier contains an "exclusive" type such as a class +// name, and thus can't combine with a second exclusive type. +// +// Returns false for +// - non-types +// - "unsigned" etc that may suffice as types but may modify others +// - cases of uncertainty (e.g. due to ambiguity) +bool hasExclusiveType(const ForestNode *N) { + // FIXME: every time we apply this check, we walk the whole subtree. + // Add per-node caching instead. + while (true) { + assert(N->symbol() == (SymbolID)Symbol::decl_specifier_seq || + N->symbol() == (SymbolID)Symbol::type_specifier_seq || + N->symbol() == (SymbolID)Symbol::defining_type_specifier_seq || + N->symbol() == (SymbolID)Symbol::decl_specifier || + N->symbol() == (SymbolID)Symbol::type_specifier || + N->symbol() == (SymbolID)Symbol::defining_type_specifier || + N->symbol() == (SymbolID)Symbol::simple_type_specifier); + if (N->kind() == ForestNode::Opaque) + return false; // conservatively. + if (N->kind() == ForestNode::Ambiguous) { + // Conservatively, this has a type if all options do. + for (const auto* Alt : N->alternatives()) + if (!hasExclusiveType(Alt)) + return false; + return true; + } + // All supported symbols are nonterminals. + assert(N->kind() == ForestNode::Sequence); + switch (N->rule()) { + // seq := element seq: check element then continue into seq + case (RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq: + case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq: + case (RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq: + if (hasExclusiveType(N->children().front())) + return true; + N = N->children().back(); + continue; + // seq := element: continue into element + case (RuleID)Rule::decl_specifier_seq_0decl_specifier: + case (RuleID)Rule::type_specifier_seq_0type_specifier: + case (RuleID)Rule::defining_type_specifier_seq_0defining_type_specifier: + N = N->children().front(); + continue; + + // defining-type-specifier + case (RuleID)Rule::defining_type_specifier_0type_specifier: + N = N->children().front(); + continue; + case (RuleID)Rule::defining_type_specifier_0class_specifier: + case (RuleID)Rule::defining_type_specifier_0enum_specifier: + return true; + + // decl-specifier + case (RuleID)Rule::decl_specifier_0defining_type_specifier: + N = N->children().front(); + continue; + case (RuleID)Rule::decl_specifier_0consteval: + case (RuleID)Rule::decl_specifier_0constexpr: + case (RuleID)Rule::decl_specifier_0constinit: + case (RuleID)Rule::decl_specifier_0inline: + case (RuleID)Rule::decl_specifier_0friend: + case (RuleID)Rule::decl_specifier_0storage_class_specifier: + case (RuleID)Rule::decl_specifier_0typedef: + case (RuleID)Rule::decl_specifier_0function_specifier: + return false; + + // type-specifier + case (RuleID)Rule::type_specifier_0elaborated_type_specifier: + case (RuleID)Rule::type_specifier_0typename_specifier: + case (RuleID)Rule::type_specifier_0cv_qualifier: + return false; + case (RuleID)Rule::type_specifier_0simple_type_specifier: + N = N->children().front(); + continue; + + // simple-type-specifier + case (RuleID)Rule::simple_type_specifier_0type_name: + case (RuleID)Rule::simple_type_specifier_0template_name: + case (RuleID)Rule::simple_type_specifier_0builtin_type: + case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_2simple_template_id: + case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1template_name: + case (RuleID)Rule::simple_type_specifier_0nested_name_specifier_1type_name: + case (RuleID)Rule::simple_type_specifier_0decltype_specifier: + case (RuleID)Rule::simple_type_specifier_0placeholder_type_specifier: + return true; + case (RuleID)Rule::simple_type_specifier_0long: + case (RuleID)Rule::simple_type_specifier_0short: + case (RuleID)Rule::simple_type_specifier_0signed: + case (RuleID)Rule::simple_type_specifier_0unsigned: + return false; + + default: + LLVM_DEBUG(llvm::errs() << "Unhandled rule " << N->rule() << "\n"); + llvm_unreachable("hasExclusiveType be exhaustive!"); + } + } +} + llvm::DenseMap buildGuards() { +#define GUARD(cond) \ + { \ + [](const GuardParams &P) { return cond; } \ + } #define TOKEN_GUARD(kind, cond) \ [](const GuardParams& P) { \ const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens); \ @@ -177,6 +282,19 @@ {(RuleID)Rule::non_function_declarator_0declarator, SYMBOL_GUARD(declarator, !isFunctionDeclarator(&N))}, + // A {decl,type,defining-type}-specifier-sequence cannot have multiple + // "exclusive" types (like class names): a value has only one type. + {(RuleID)Rule:: + defining_type_specifier_seq_0defining_type_specifier_1defining_type_specifier_seq, + GUARD(!hasExclusiveType(P.RHS.front()) || + !hasExclusiveType(P.RHS.back()))}, + {(RuleID)Rule::type_specifier_seq_0type_specifier_1type_specifier_seq, + GUARD(!hasExclusiveType(P.RHS.front()) || + !hasExclusiveType(P.RHS.back()))}, + {(RuleID)Rule::decl_specifier_seq_0decl_specifier_1decl_specifier_seq, + GUARD(!hasExclusiveType(P.RHS.front()) || + !hasExclusiveType(P.RHS.back()))}, + {(RuleID)Rule::contextual_override_0identifier, TOKEN_GUARD(identifier, Tok.text() == "override")}, {(RuleID)Rule::contextual_final_0identifier, @@ -190,10 +308,12 @@ {(RuleID)Rule::contextual_zero_0numeric_constant, TOKEN_GUARD(numeric_constant, Tok.text() == "0")}, - {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement, - guardNextTokenNotElse}, - {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement, - guardNextTokenNotElse}, + {(RuleID)Rule:: + selection_statement_0if_1l_paren_2condition_3r_paren_4statement, + guardNextTokenNotElse}, + {(RuleID)Rule:: + selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement, + guardNextTokenNotElse}, // The grammar distinguishes (only) user-defined vs plain string literals, // where the clang lexer distinguishes (only) encoding types. diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf --- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -350,7 +350,7 @@ decl-specifier := CONSTINIT decl-specifier := INLINE decl-specifier-seq := decl-specifier -decl-specifier-seq := decl-specifier decl-specifier-seq +decl-specifier-seq := decl-specifier decl-specifier-seq [guard] storage-class-specifier := STATIC storage-class-specifier := THREAD_LOCAL storage-class-specifier := EXTERN @@ -364,31 +364,32 @@ type-specifier := typename-specifier type-specifier := cv-qualifier type-specifier-seq := type-specifier -type-specifier-seq := type-specifier type-specifier-seq +type-specifier-seq := type-specifier type-specifier-seq [guard] defining-type-specifier := type-specifier defining-type-specifier := class-specifier defining-type-specifier := enum-specifier defining-type-specifier-seq := defining-type-specifier -defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq +defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq [guard] simple-type-specifier := nested-name-specifier_opt type-name simple-type-specifier := nested-name-specifier TEMPLATE simple-template-id simple-type-specifier := decltype-specifier simple-type-specifier := placeholder-type-specifier simple-type-specifier := nested-name-specifier_opt template-name -simple-type-specifier := CHAR -simple-type-specifier := CHAR8_T -simple-type-specifier := CHAR16_T -simple-type-specifier := CHAR32_T -simple-type-specifier := WCHAR_T -simple-type-specifier := BOOL +simple-type-specifier := builtin-type +builtin-type := CHAR +builtin-type := CHAR8_T +builtin-type := CHAR16_T +builtin-type := CHAR32_T +builtin-type := WCHAR_T +builtin-type := BOOL simple-type-specifier := SHORT -simple-type-specifier := INT +builtin-type := INT simple-type-specifier := LONG simple-type-specifier := SIGNED simple-type-specifier := UNSIGNED -simple-type-specifier := FLOAT -simple-type-specifier := DOUBLE -simple-type-specifier := VOID +builtin-type := FLOAT +builtin-type := DOUBLE +builtin-type := VOID type-name := class-name type-name := enum-name type-name := typedef-name diff --git a/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp b/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/test/cxx/decl-specfier-seq.cpp @@ -0,0 +1,27 @@ +// RUN: clang-pseudo -grammar=cxx -source=%s --print-forest | FileCheck %s + +// not parsed as Type{foo} Type{bar} +foo bar; +// CHECK-NOT: simple-declaration := decl-specifier-seq ; +// CHECK: simple-declaration := decl-specifier-seq init-declarator-list ; +// CHECK: ├─decl-specifier-seq~simple-type-specifier +// CHECK: ├─init-declarator-list~IDENTIFIER +// CHECK: └─; +// CHECK-NOT: simple-declaration := decl-specifier-seq ; + +// not parsed as Type{std} Type{::string} Declarator{s}; +std::string s; +// CHECK-NOT: nested-name-specifier := :: +// CHECK: simple-declaration := decl-specifier-seq init-declarator-list ; +// CHECK: ├─decl-specifier-seq~simple-type-specifier := +// CHECK: │ ├─simple-type-specifier := nested-name-specifier type-name +// CHECK: │ │ ├─nested-name-specifier := #1 +// CHECK: │ │ │ ├─nested-name-specifier := type-name :: +// CHECK: │ │ │ └─nested-name-specifier := namespace-name :: +// CHECK: │ │ └─type-name +// CHECK: │ └─simple-type-specifier := nested-name-specifier template-name +// CHECK: │ ├─nested-name-specifier =#1 +// CHECK: │ └─template-name~IDENTIFIER +// CHECK: ├─init-declarator-list~IDENTIFIER +// CHECK: └─; +// CHECK-NOT: nested-name-specifier := :: diff --git a/clang-tools-extra/pseudo/test/fuzzer.cpp b/clang-tools-extra/pseudo/test/fuzzer.cpp --- a/clang-tools-extra/pseudo/test/fuzzer.cpp +++ b/clang-tools-extra/pseudo/test/fuzzer.cpp @@ -1,4 +1,4 @@ // RUN: clang-pseudo-fuzzer -grammar=%cxx-bnf-file -print %s | FileCheck %s int x; // CHECK: translation-unit := declaration-seq -// CHECK: simple-type-specifier := INT +// CHECK: builtin-type := INT