diff --git a/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp b/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp --- a/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp +++ b/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp @@ -92,7 +92,7 @@ return It->second; }; for (const auto &Spec : Specs) { - assert(Spec.Sequence.size() < Rule::MaxElements); + assert(Spec.Sequence.size() <= Rule::MaxElements); Symbols.clear(); for (const RuleSpec::Element &Elt : Spec.Sequence) Symbols.push_back(Lookup(Elt.Symbol)); diff --git a/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf b/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/Pseudo/cxx.bnf @@ -0,0 +1,739 @@ +# This is a C++ grammar from the C++ standard [1]. +# +# The grammar is a superset of the true grammar requring semantic constraints to +# resolve ambiguties. The grammar is context-free and ambiguous (beyond the +# limit of LR(k)). We use general parsing algorithm (e.g GLR) to handle the +# grammar and generate a transition table which is used to drive the parsing. +# +# It aims to align with the ISO C++ grammar as much as possible. We adjust it +# to fit the need for the grammar-based parser: +# - attributes are omitted, which will be handled as comments; +# - we don't allow nullable non-terminal symbols. There are few nullable +# non-terminals in the spec grammar, they are adjusted to be non-nullable; +# - the file merely describes the core C++ grammar. Preprocessor directives and +# lexical conversions are omitted as we reuse clang's lexer and run a fake +# preprocessor; +# +# Guidelines: +# - non-terminals are lower_case; terminals (aka tokens) correspond to +# clang::TokenKind, written as "IDENTIFIER", "USING", "::" etc; +# - optional symbols are supported, with a _opt suffix; +# +# [1] https://isocpp.org/files/papers/N4860.pdf +# +# +# _ serves as a "fake" start symbol, coming with real grammar symbols. +_ := translation-unit + +# gram.key +typedef-name := IDENTIFIER +typedef-name := simple-template-id +namespace-name := IDENTIFIER +namespace-name := namespace-alias +namespace-alias := IDENTIFIER +class-name := IDENTIFIER +class-name := simple-template-id +enum-name := IDENTIFIER +template-name := IDENTIFIER + +# gram.basic +#! Custom modifications to eliminate optional declaration-seq +translation-unit := declaration-seq +translation-unit := global-module-fragment_opt module-declaration declaration-seq_opt private-module-fragment_opt + +# gram.expr +# expr.prim +primary-expression := literal +primary-expression := THIS +primary-expression := ( expression ) +primary-expression := id-expression +primary-expression := lambda-expression +primary-expression := fold-expression +primary-expression := requires-expression +id-expression := unqualified-id +id-expression := qualified-id +unqualified-id := IDENTIFIER +unqualified-id := operator-function-id +unqualified-id := conversion-function-id +unqualified-id := literal-operator-id +unqualified-id := ~ type-name +unqualified-id := ~ decltype-specifier +unqualified-id := template-id +qualified-id := nested-name-specifier TEMPLATE_opt unqualified-id +nested-name-specifier := :: +nested-name-specifier := type-name :: +nested-name-specifier := namespace-name :: +nested-name-specifier := decltype-specifier :: +nested-name-specifier := nested-name-specifier IDENTIFIER :: +nested-name-specifier := nested-name-specifier TEMPLATE_opt simple-template-id :: +lambda-expression := lambda-introducer lambda-declarator_opt compound-statement +lambda-expression := lambda-introducer < template-parameter-list > requires-clause_opt lambda-declarator_opt compound-statement +lambda-introducer := [ lambda-capture_opt ] +lambda-declarator := ( parameter-declaration-clause ) decl-specifier-seq_opt noexcept-specifier_opt trailing-return-type_opt requires-clause_opt +lambda-capture := capture-default +lambda-capture := capture-list +lambda-capture := capture-default , capture-list +capture-default := & +capture-default := = +capture-list := capture +capture-list := capture-list , capture +capture := simple-capture +capture := init-capture +simple-capture := IDENTIFIER ..._opt +simple-capture := & IDENTIFIER ..._opt +simple-capture := THIS +simple-capture := * THIS +init-capture := ..._opt IDENTIFIER initializer +init-capture := & ..._opt IDENTIFIER initializer +fold-expression := ( cast-expression fold-operator ... ) +fold-expression := ( ... fold-operator cast-expression ) +fold-expression := ( cast-expression fold-operator ... fold-operator cast-expression ) +fold-operator := + +fold-operator := - +fold-operator := * +fold-operator := / +fold-operator := % +fold-operator := ^ +fold-operator := | +fold-operator := << +fold-operator := >> +fold-operator := += +fold-operator := -= +fold-operator := *= +fold-operator := /= +fold-operator := %= +fold-operator := ^= +fold-operator := &= +fold-operator := |= +fold-operator := <<= +fold-operator := >>= +fold-operator := = +fold-operator := == +fold-operator := != +fold-operator := < +fold-operator := > +fold-operator := <= +fold-operator := >= +fold-operator := && +fold-operator := || +fold-operator := , +fold-operator := .* +fold-operator := ->* +requires-expression := REQUIRES requirement-parameter-list_opt requirement-body +requirement-parameter-list := ( parameter-declaration-clause_opt ) +requirement-body := { requirement-seq } +requirement-seq := requirement +requirement-seq := requirement-seq requirement +requirement := simple-requirement +requirement := type-requirement +requirement := compound-requirement +requirement := nested-requirement +simple-requirement := expression ; +type-requirement := TYPENAME nested-name-specifier_opt type-name ; +compound-requirement := { expression } NOEXCEPT_opt return-type-requirement_opt ; +return-type-requirement := -> type-constraint +nested-requirement := REQUIRES constraint-expression ; +# expr.post +postfix-expression := primary-expression +postfix-expression := postfix-expression [ expr-or-braced-init-list ] +postfix-expression := postfix-expression ( expression-list_opt ) +postfix-expression := simple-type-specifier ( expression-list_opt ) +postfix-expression := typename-specifier ( expression-list_opt ) +postfix-expression := simple-type-specifier braced-init-list +postfix-expression := postfix-expression . TEMPLATE_opt id-expression +postfix-expression := postfix-expression -> TEMPLATE_opt id-expression +postfix-expression := postfix-expression ++ +postfix-expression := postfix-expression -- +postfix-expression := DYNAMIC_CAST < type-id > ( expression ) +postfix-expression := STATIC_CAST < type-id > ( expression ) +postfix-expression := REINTERPRET_CAST < type-id > ( expression ) +postfix-expression := CONST_CAST < type-id > ( expression ) +postfix-expression := TYPEID ( expression ) +postfix-expression := TYPEID ( type-id ) +expression-list := initializer-list +# expr.unary +unary-expression := postfix-expression +unary-expression := unary-operator cast-expression +unary-expression := ++ cast-expression +unary-expression := -- cast-expression +unary-expression := await-expression +unary-expression := SIZEOF unary-expression +unary-expression := SIZEOF ( type-id ) +unary-expression := SIZEOF ... ( IDENTIFIER ) +unary-expression := ALIGNOF ( type-id ) +unary-expression := noexcept-expression +unary-expression := new-expression +unary-expression := delete-expression +unary-operator := * +unary-operator := & +unary-operator := + +unary-operator := - +unary-operator := ! +unary-operator := ~ +await-expression := CO_AWAIT cast-expression +noexcept-expression := NOEXCEPT ( expression ) +new-expression := ::_opt NEW new-placement_opt new-type-id new-initializer_opt +new-expression := ::_opt NEW new-placement_opt ( type-id ) new-initializer_opt +new-placement := ( expression-list ) +new-type-id := type-specifier-seq new-declarator_opt +new-declarator := ptr-operator new-declarator_opt +new-declarator := noptr-new-declarator +noptr-new-declarator := [ expression_opt ] +noptr-new-declarator := noptr-new-declarator [ constant-expression ] +new-initializer := ( expression-list_opt ) +new-initializer := braced-init-list +delete-expression := ::_opt DELETE cast-expression +delete-expression := ::_opt DELETE [ ] cast-expression +cast-expression := unary-expression +cast-expression := ( type-id ) cast-expression +# expr.mptr.oper +pm-expression := cast-expression +pm-expression := pm-expression .* cast-expression +pm-expression := pm-expression ->* cast-expression +# expr.mul +multiplicative-expression := pm-expression +multiplicative-expression := multiplicative-expression * pm-expression +multiplicative-expression := multiplicative-expression / pm-expression +multiplicative-expression := multiplicative-expression % pm-expression +# expr.add +additive-expression := multiplicative-expression +additive-expression := additive-expression + multiplicative-expression +additive-expression := additive-expression - multiplicative-expression +# expr.shift +shift-expression := additive-expression +shift-expression := shift-expression << additive-expression +shift-expression := shift-expression >> additive-expression +# expr.spaceship +compare-expression := shift-expression +compare-expression := compare-expression <=> shift-expression +# expr.rel +relational-expression := compare-expression +relational-expression := relational-expression < compare-expression +relational-expression := relational-expression > compare-expression +relational-expression := relational-expression <= compare-expression +relational-expression := relational-expression >= compare-expression +# expr.eq +equality-expression := relational-expression +equality-expression := equality-expression == relational-expression +equality-expression := equality-expression != relational-expression +# expr.bit.and +and-expression := equality-expression +and-expression := and-expression & equality-expression +# expr.xor +exclusive-or-expression := and-expression +exclusive-or-expression := exclusive-or-expression ^ and-expression +# expr.or +inclusive-or-expression := exclusive-or-expression +inclusive-or-expression := inclusive-or-expression | exclusive-or-expression +# expr.log.and +logical-and-expression := inclusive-or-expression +logical-and-expression := logical-and-expression && inclusive-or-expression +# expr.log.or +logical-or-expression := logical-and-expression +logical-or-expression := logical-or-expression || logical-and-expression +# expr.cond +conditional-expression := logical-or-expression +conditional-expression := logical-or-expression ? expression : assignment-expression +# expr.ass +yield-expression := CO_YIELD assignment-expression +yield-expression := CO_YIELD braced-init-list +throw-expression := THROW assignment-expression_opt +assignment-expression := conditional-expression +assignment-expression := yield-expression +assignment-expression := throw-expression +assignment-expression := logical-or-expression assignment-operator initializer-clause +assignment-operator := = +assignment-operator := *= +assignment-operator := /= +assignment-operator := %= +assignment-operator := += +assignment-operator := -= +assignment-operator := >>= +assignment-operator := <<= +assignment-operator := &= +assignment-operator := ^= +assignment-operator := |= +# expr.comma +expression := assignment-expression +expression := expression , assignment-expression +# expr.const +constant-expression := conditional-expression + +# gram.stmt +statement := labeled-statement +statement := expression-statement +statement := compound-statement +statement := selection-statement +statement := iteration-statement +statement := jump-statement +statement := declaration-statement +statement := try-block +init-statement := expression-statement +init-statement := simple-declaration +condition := expression +condition := decl-specifier-seq declarator brace-or-equal-initializer +labeled-statement := IDENTIFIER : statement +labeled-statement := CASE constant-expression : statement +labeled-statement := DEFAULT : statement +expression-statement := expression_opt ; +compound-statement := { statement-seq_opt } +statement-seq := statement +statement-seq := statement-seq statement +selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement +selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement ELSE statement +selection-statement := SWITCH ( init-statement_opt condition ) statement +iteration-statement := WHILE ( condition ) statement +iteration-statement := DO statement WHILE ( expression ) ; +iteration-statement := FOR ( init-statement condition_opt ; expression_opt ) statement +iteration-statement := FOR ( init-statement_opt for-range-declaration : for-range-initializer ) statement +for-range-declaration := decl-specifier-seq declarator +for-range-declaration := decl-specifier-seq ref-qualifier_opt [ identifier-list ] +for-range-initializer := expr-or-braced-init-list +jump-statement := BREAK ; +jump-statement := CONTINUE ; +jump-statement := RETURN expr-or-braced-init-list_opt ; +jump-statement := coroutine-return-statement +jump-statement := GOTO IDENTIFIER ; +coroutine-return-statement := CO_RETURN expr-or-braced-init-list_opt ; +declaration-statement := block-declaration + +# gram.dcl +declaration-seq := declaration +declaration-seq := declaration-seq declaration +declaration := block-declaration +declaration := nodeclspec-function-declaration +declaration := function-definition +declaration := template-declaration +declaration := deduction-guide +declaration := explicit-instantiation +declaration := explicit-specialization +declaration := export-declaration +declaration := linkage-specification +declaration := namespace-definition +declaration := empty-declaration +declaration := module-import-declaration +block-declaration := simple-declaration +block-declaration := asm-declaration +block-declaration := namespace-alias-definition +block-declaration := using-declaration +block-declaration := using-enum-declaration +block-declaration := using-directive +block-declaration := static_assert-declaration +block-declaration := alias-declaration +block-declaration := opaque-enum-declaration +nodeclspec-function-declaration := declarator ; +alias-declaration := USING IDENTIFIER = defining-type-id ; +simple-declaration := decl-specifier-seq init-declarator-list_opt ; +simple-declaration := decl-specifier-seq ref-qualifier_opt [ identifier-list ] initializer ; +static_assert-declaration := STATIC_ASSERT ( constant-expression ) ; +static_assert-declaration := STATIC_ASSERT ( constant-expression , string-literal ) ; +empty-declaration := ; +# dcl.spec +decl-specifier := storage-class-specifier +decl-specifier := defining-type-specifier +decl-specifier := function-specifier +decl-specifier := FRIEND +decl-specifier := TYPEDEF +decl-specifier := CONSTEXPR +decl-specifier := CONSTEVAL +decl-specifier := CONSTINIT +decl-specifier := INLINE +decl-specifier-seq := decl-specifier +decl-specifier-seq := decl-specifier decl-specifier-seq +storage-class-specifier := STATIC +storage-class-specifier := THREAD_LOCAL +storage-class-specifier := EXTERN +storage-class-specifier := MUTABLE +function-specifier := VIRTUAL +function-specifier := explicit-specifier +explicit-specifier := EXPLICIT ( constant-expression ) +explicit-specifier := EXPLICIT +type-specifier := simple-type-specifier +type-specifier := elaborated-type-specifier +type-specifier := typename-specifier +type-specifier := cv-qualifier +type-specifier-seq := type-specifier +type-specifier-seq := type-specifier type-specifier-seq +defining-type-specifier := type-specifier +defining-type-specifier := class-specifier +defining-type-specifier := enum-specifier +defining-type-specifier-seq := defining-type-specifier +defining-type-specifier-seq := defining-type-specifier defining-type-specifier-seq +simple-type-specifier := nested-name-specifier_opt type-name +simple-type-specifier := nested-name-specifier TEMPLATE simple-template-id +simple-type-specifier := decltype-specifier +simple-type-specifier := placeholder-type-specifier +simple-type-specifier := nested-name-specifier_opt template-name +simple-type-specifier := CHAR +simple-type-specifier := CHAR8_T +simple-type-specifier := CHAR16_T +simple-type-specifier := CHAR32_T +simple-type-specifier := WCHAR_T +simple-type-specifier := BOOL +simple-type-specifier := SHORT +simple-type-specifier := INT +simple-type-specifier := LONG +simple-type-specifier := SIGNED +simple-type-specifier := UNSIGNED +simple-type-specifier := FLOAT +simple-type-specifier := DOUBLE +simple-type-specifier := VOID +type-name := class-name +type-name := enum-name +type-name := typedef-name +elaborated-type-specifier := class-key nested-name-specifier_opt IDENTIFIER +elaborated-type-specifier := class-key simple-template-id +elaborated-type-specifier := class-key nested-name-specifier TEMPLATE_opt simple-template-id +elaborated-type-specifier := elaborated-enum-specifier +elaborated-enum-specifier := ENUM nested-name-specifier_opt IDENTIFIER +decltype-specifier := DECLTYPE ( expression ) +placeholder-type-specifier := type-constraint_opt AUTO +placeholder-type-specifier := type-constraint_opt DECLTYPE ( AUTO ) +init-declarator-list := init-declarator +init-declarator-list := init-declarator-list , init-declarator +init-declarator := declarator initializer_opt +init-declarator := declarator requires-clause +declarator := ptr-declarator +declarator := noptr-declarator parameters-and-qualifiers trailing-return-type +ptr-declarator := noptr-declarator +ptr-declarator := ptr-operator ptr-declarator +noptr-declarator := declarator-id +noptr-declarator := noptr-declarator parameters-and-qualifiers +noptr-declarator := noptr-declarator [ constant-expression_opt ] +noptr-declarator := ( ptr-declarator ) +parameters-and-qualifiers := ( parameter-declaration-list_opt ) cv-qualifier-seq_opt ref-qualifier_opt noexcept-specifier_opt +trailing-return-type := -> type-id +ptr-operator := * cv-qualifier-seq_opt +ptr-operator := & +ptr-operator := && +ptr-operator := nested-name-specifier * cv-qualifier-seq_opt +cv-qualifier-seq := cv-qualifier cv-qualifier-seq_opt +cv-qualifier := CONST +cv-qualifier := VOLATILE +ref-qualifier := & +ref-qualifier := && +declarator-id := ..._opt id-expression +type-id := type-specifier-seq abstract-declarator_opt +defining-type-id := defining-type-specifier-seq abstract-declarator_opt +abstract-declarator := ptr-abstract-declarator +abstract-declarator := noptr-abstract-declarator_opt parameters-and-qualifiers trailing-return-type +abstract-declarator := abstract-pack-declarator +ptr-abstract-declarator := noptr-abstract-declarator +ptr-abstract-declarator := ptr-operator ptr-abstract-declarator_opt +noptr-abstract-declarator := noptr-abstract-declarator_opt parameters-and-qualifiers +noptr-abstract-declarator := noptr-abstract-declarator_opt [ constant-expression ] +noptr-abstract-declarator := ( ptr-abstract-declarator ) +abstract-pack-declarator := noptr-abstract-pack-declarator +abstract-pack-declarator := ptr-operator abstract-pack-declarator +noptr-abstract-pack-declarator := noptr-abstract-pack-declarator parameters-and-qualifiers +noptr-abstract-pack-declarator := noptr-abstract-pack-declarator [ constant-expression_opt ] +noptr-abstract-pack-declarator := ... +#! Custom modifications to avoid nullable clause. +parameter-declaration-clause := parameter-declaration-list +parameter-declaration-clause := parameter-declaration-list_opt ... +parameter-declaration-clause := parameter-declaration-list , ... +parameter-declaration-list := parameter-declaration +parameter-declaration-list := parameter-declaration-list , parameter-declaration +parameter-declaration := decl-specifier-seq declarator +parameter-declaration := decl-specifier-seq declarator = initializer-clause +parameter-declaration := decl-specifier-seq abstract-declarator_opt +parameter-declaration := decl-specifier-seq abstract-declarator_opt = initializer-clause +# dcl.init +initializer := brace-or-equal-initializer +initializer := ( expression-list ) +brace-or-equal-initializer := = initializer-clause +brace-or-equal-initializer := braced-init-list +initializer-clause := assignment-expression +initializer-clause := braced-init-list +braced-init-list := { initializer-list ,_opt } +braced-init-list := { designated-initializer-list ,_opt } +braced-init-list := { } +initializer-list := initializer-clause ..._opt +initializer-list := initializer-list , initializer-clause ..._opt +designated-initializer-list := designated-initializer-clause +designated-initializer-list := designated-initializer-list , designated-initializer-clause +designated-initializer-clause := designator brace-or-equal-initializer +designator := . IDENTIFIER +expr-or-braced-init-list := expression +expr-or-braced-init-list := braced-init-list +# dcl.fct +function-definition := decl-specifier-seq_opt declarator virt-specifier-seq_opt function-body +function-definition := decl-specifier-seq_opt declarator requires-clause function-body +function-body := ctor-initializer_opt compound-statement +function-body := function-try-block +function-body := = DEFAULT ; +function-body := = DELETE ; +# dcl.enum +enum-specifier := enum-head { enumerator-list_opt } +enum-specifier := enum-head { enumerator-list , } +enum-head := enum-key enum-head-name_opt enum-base_opt +enum-head-name := nested-name-specifier_opt IDENTIFIER +opaque-enum-declaration := enum-key enum-head-name enum-base_opt ; +enum-key := ENUM +enum-key := ENUM CLASS +enum-key := ENUM STRUCT +enum-base := : type-specifier-seq +enumerator-list := enumerator-definition +enumerator-list := enumerator-list , enumerator-definition +enumerator-definition := enumerator +enumerator-definition := enumerator = constant-expression +enumerator := IDENTIFIER +using-enum-declaration := USING elaborated-enum-specifier ; +# basic.namespace +namespace-definition := named-namespace-definition +namespace-definition := unnamed-namespace-definition +namespace-definition := nested-namespace-definition +named-namespace-definition := INLINE_opt NAMESPACE IDENTIFIER { namespace-body_opt } +unnamed-namespace-definition := INLINE_opt NAMESPACE { namespace-body_opt } +nested-namespace-definition := NAMESPACE enclosing-namespace-specifier :: INLINE_opt IDENTIFIER { namespace-body } +enclosing-namespace-specifier := IDENTIFIER +enclosing-namespace-specifier := enclosing-namespace-specifier :: INLINE_opt IDENTIFIER +#! Custom modification to avoid nullable namespace-body. +namespace-body := declaration-seq +namespace-alias-definition := NAMESPACE IDENTIFIER = qualified-namespace-specifier ; +qualified-namespace-specifier := nested-name-specifier_opt namespace-name +using-directive := USING NAMESPACE nested-name-specifier_opt namespace-name ; +using-declaration := USING using-declarator-list ; +using-declarator-list := using-declarator ..._opt +using-declarator-list := using-declarator-list , using-declarator ..._opt +using-declarator := TYPENAME_opt nested-name-specifier unqualified-id +# dcl.asm +asm-declaration := ASM ( string-literal ) ; +# dcl.link +linkage-specification := EXTERN string-literal { declaration-seq_opt } +linkage-specification := EXTERN string-literal declaration + +# gram.module +module-declaration := export-keyword_opt module-keyword module-name module-partition_opt +module-name := module-name-qualifier_opt IDENTIFIER +module-partition := : module-name-qualifier_opt IDENTIFIER +module-name-qualifier := IDENTIFIER . +module-name-qualifier := module-name-qualifier IDENTIFIER . +export-declaration := EXPORT declaration +export-declaration := EXPORT ( declaration-seq_opt ) +export-declaration := export-keyword module-import-declaration +module-import-declaration := import-keyword module-name +module-import-declaration := import-keyword module-partition +# FIXME: we don't have header-name in the grammar. Handle these in PP? +# module-import-declaration := import-keyword header-name +global-module-fragment := module-keyword ; declaration-seq_opt +private-module-fragment := module-keyword : PRIVATE ; declaration-seq_opt + +# gram.class +class-specifier := class-head { member-specification_opt } +class-head := class-key class-head-name class-virt-specifier_opt base-clause_opt +class-head := class-key base-clause_opt +class-head-name := nested-name-specifier_opt class-name +class-virt-specifier := contextual-final +class-key := CLASS +class-key := STRUCT +class-key := UNION +member-specification := member-declaration member-specification_opt +member-specification := access-specifier : member-declaration member-specification_opt +member-declaration := decl-specifier-seq_opt member-declarator-list_opt ; +member-declaration := function-definition +member-declaration := using-declaration +member-declaration := using-enum-declaration +member-declaration := static_assert-declaration +member-declaration := template-declaration +member-declaration := explicit-specialization +member-declaration := deduction-guide +member-declaration := alias-declaration +member-declaration := opaque-enum-declaration +member-declaration := empty-declaration +member-declarator-list := member-declarator +member-declarator-list := member-declarator-list , member-declarator +member-declarator := declarator virt-specifier-seq_opt pure-specifier_opt +member-declarator := declarator requires-clause +member-declarator := declarator brace-or-equal-initializer +member-declarator := IDENTIFIER_opt : constant-expression brace-or-equal-initializer_opt +virt-specifier-seq := virt-specifier +virt-specifier-seq := virt-specifier-seq virt-specifier +virt-specifier := contextual-override +virt-specifier := contextual-final +pure-specifier := = contextual-zero +conversion-function-id := OPERATOR conversion-type-id +conversion-type-id := type-specifier-seq conversion-declarator_opt +conversion-declarator := ptr-operator conversion-declarator_opt +base-clause := : base-specifier-list +base-specifier-list := base-specifier ..._opt +base-specifier-list := base-specifier-list , base-specifier ..._opt +base-specifier := class-or-decltype +base-specifier := VIRTUAL access-specifier_opt class-or-decltype +base-specifier := access-specifier VIRTUAL_opt class-or-decltype +class-or-decltype := nested-name-specifier_opt type-name +class-or-decltype := nested-name-specifier TEMPLATE simple-template-id +class-or-decltype := decltype-specifier +access-specifier := PRIVATE +access-specifier := PROTECTED +access-specifier := PUBLIC +ctor-initializer := : mem-initializer-list +mem-initializer-list := mem-initializer ..._opt +mem-initializer-list := mem-initializer-list , mem-initializer ..._opt +mem-initializer := mem-initializer-id ( expression-list_opt ) +mem-initializer := mem-initializer-id braced-init-list +mem-initializer-id := class-or-decltype +mem-initializer-id := IDENTIFIER + +# gram.over +operator-function-id := OPERATOR operator-name +operator-name := NEW +operator-name := DELETE +operator-name := NEW [ ] +operator-name := DELETE [ ] +operator-name := CO_AWAIT +operator-name := ( ) +operator-name := [ ] +operator-name := -> +operator-name := ->* +operator-name := ~ +operator-name := ! +operator-name := + +operator-name := - +operator-name := * +operator-name := / +operator-name := % +operator-name := ^ +operator-name := & +operator-name := | +operator-name := = +operator-name := += +operator-name := -= +operator-name := *= +operator-name := /= +operator-name := %= +operator-name := ^= +operator-name := &= +operator-name := |= +operator-name := == +operator-name := != +operator-name := < +operator-name := > +operator-name := <= +operator-name := >= +operator-name := <=> +operator-name := ^^ +operator-name := || +operator-name := << +operator-name := >> +operator-name := <<= +operator-name := >>= +operator-name := ++ +operator-name := -- +operator-name := , +literal-operator-id := OPERATOR string-literal IDENTIFIER +literal-operator-id := OPERATOR user-defined-string-literal + +# gram.temp +template-declaration := template-head declaration +template-declaration := template-head concept-definition +template-head := TEMPLATE < template-parameter-list > requires-clause_opt +template-parameter-list := template-parameter +template-parameter-list := template-parameter-list , template-parameter +requires-clause := REQUIRES constraint-logical-or-expression +constraint-logical-or-expression := constraint-logical-and-expression +constraint-logical-or-expression := constraint-logical-or-expression || constraint-logical-and-expression +constraint-logical-and-expression := primary-expression +constraint-logical-and-expression := constraint-logical-and-expression && primary-expression +template-parameter := type-parameter +template-parameter := parameter-declaration +type-parameter := type-parameter-key ..._opt IDENTIFIER +type-parameter := type-parameter-key IDENTIFIER_opt = type-id +type-parameter := type-constraint ..._opt IDENTIFIER_opt +type-parameter := type-constraint IDENTIFIER_opt = type-id +type-parameter := template-head type-parameter-key ..._opt IDENTIFIER_opt +type-parameter := template-head type-parameter-key IDENTIFIER_opt = id-expression +type-parameter-key := CLASS +type-parameter-key := TYPENAME +type-constraint := nested-name-specifier_opt concept-name +type-constraint := nested-name-specifier_opt concept-name < template-argument-list_opt > +simple-template-id := template-name < template-argument-list_opt > +template-id := simple-template-id +template-id := operator-function-id < template-argument-list_opt > +template-id := literal-operator-id < template-argument-list_opt > +template-argument-list := template-argument ..._opt +template-argument-list := template-argument-list , template-argument ..._opt +template-argument := constant-expression +template-argument := type-id +template-argument := id-expression +constraint-expression := logical-or-expression +deduction-guide := explicit-specifier_opt template-name ( parameter-declaration-list_opt ) -> simple-template-id ; +concept-definition := CONCEPT concept-name = constraint-expression ; +concept-name := IDENTIFIER +typename-specifier := TYPENAME nested-name-specifier IDENTIFIER +typename-specifier := TYPENAME nested-name-specifier TEMPLATE_opt simple-template-id +explicit-instantiation := EXTERN_opt TEMPLATE declaration +explicit-specialization := TEMPLATE < > declaration + +# gram.except +try-block := TRY compound-statement handler-seq +function-try-block := TRY ctor-initializer_opt compound-statement handler-seq +handler-seq := handler handler-seq_opt +handler := CATCH ( exception-declaration ) compound-statement +exception-declaration := type-specifier-seq declarator +exception-declaration := type-specifier-seq abstract-declarator_opt +noexcept-specifier := NOEXCEPT ( constant-expression ) +noexcept-specifier := NOEXCEPT + +# gram.cpp +identifier-list := IDENTIFIER +identifier-list := identifier-list , IDENTIFIER + +# gram.lex +#! As we use clang lexer, most of lexical symbols are not needed, we only add +#! needed literals. +literal := integer-literal +literal := character-literal +literal := floating-point-literal +literal := string-literal +literal := boolean-literal +literal := pointer-literal +literal := user-defined-literal +integer-literal := NUMERIC_CONSTANT +character-literal := CHAR_CONSTANT +character-literal := WIDE_CHAR_CONSTANT +character-literal := UTF8_CHAR_CONSTANT +character-literal := UTF16_CHAR_CONSTANT +character-literal := UTF32_CHAR_CONSTANT +floating-point-literal := NUMERIC_CONSTANT +string-literal-chunk := STRING_LITERAL +string-literal-chunk := WIDE_STRING_LITERAL +string-literal-chunk := UTF8_STRING_LITERAL +string-literal-chunk := UTF16_STRING_LITERAL +string-literal-chunk := UTF32_STRING_LITERAL +#! Technically, string concatenation happens at phase 6 which is before parsing, +#! so it doesn't belong to the grammar. However, we extend the grammar to +#! support it, to make the pseudo parser fully functional on practical code. +string-literal := string-literal-chunk +string-literal := string-literal string-literal-chunk +user-defined-literal := user-defined-integer-literal +user-defined-literal := user-defined-floating-point-literal +user-defined-literal := user-defined-string-literal +user-defined-literal := user-defined-character-literal +user-defined-integer-literal := NUMERIC_CONSTANT +user-defined-string-literal-chunk := STRING_LITERAL +user-defined-string-literal-chunk := WIDE_STRING_LITERAL +user-defined-string-literal-chunk := UTF8_STRING_LITERAL +user-defined-string-literal-chunk := UTF16_STRING_LITERAL +user-defined-string-literal-chunk := UTF32_STRING_LITERAL +user-defined-string-literal := user-defined-string-literal-chunk +user-defined-string-literal := string-literal-chunk user-defined-string-literal +user-defined-string-literal := user-defined-string-literal string-literal-chunk +user-defined-floating-point-literal := NUMERIC_CONSTANT +user-defined-character-literal := CHAR_CONSTANT +user-defined-character-literal := WIDE_CHAR_CONSTANT +user-defined-character-literal := UTF8_CHAR_CONSTANT +user-defined-character-literal := UTF16_CHAR_CONSTANT +user-defined-character-literal := UTF32_CHAR_CONSTANT +boolean-literal := FALSE +boolean-literal := TRUE +pointer-literal := NULLPTR + +#! Contextual keywords -- clang lexer always lexes them as identifier tokens. +#! Placeholders for literal text in the grammar that lex as other things. +contextual-override := IDENTIFIER +contextual-final := IDENTIFIER +contextual-zero := NUMERIC_CONSTANT +module-keyword := IDENTIFIER +import-keyword := IDENTIFIER +export-keyword := IDENTIFIER diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt --- a/clang/test/CMakeLists.txt +++ b/clang/test/CMakeLists.txt @@ -76,6 +76,7 @@ clang-repl clang-diff clang-scan-deps + clang-pseudo diagtool hmaptool ) diff --git a/clang/test/Syntax/check-cxx-bnf.test b/clang/test/Syntax/check-cxx-bnf.test new file mode 100644 --- /dev/null +++ b/clang/test/Syntax/check-cxx-bnf.test @@ -0,0 +1,2 @@ +// verify clang/lib/Tooling/Syntax/Pseudo/cxx.bnf +// RUN: clang-pseudo -check-grammar=%cxx-bnf-file diff --git a/clang/test/Syntax/lit.local.cfg b/clang/test/Syntax/lit.local.cfg new file mode 100644 --- /dev/null +++ b/clang/test/Syntax/lit.local.cfg @@ -0,0 +1,4 @@ +cxx_bnf_file = os.path.join(config.clang_src_dir, 'lib', 'Tooling', 'Syntax', + 'Pseudo', 'cxx.bnf') +config.substitutions.append(('%cxx-bnf-file', + '%s' % (cxx_bnf_file))) diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt --- a/clang/tools/CMakeLists.txt +++ b/clang/tools/CMakeLists.txt @@ -14,6 +14,7 @@ add_clang_subdirectory(clang-offload-wrapper) add_clang_subdirectory(clang-scan-deps) add_clang_subdirectory(clang-repl) +add_clang_subdirectory(clang-pseudo) add_clang_subdirectory(c-index-test) diff --git a/clang/tools/clang-pseudo/CMakeLists.txt b/clang/tools/clang-pseudo/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/clang/tools/clang-pseudo/CMakeLists.txt @@ -0,0 +1,15 @@ +set(LLVM_LINK_COMPONENTS support) + +add_clang_tool(clang-pseudo + ClangPseudo.cpp + ) + +set(CLANG_PSEUDO_LIB_DEPS + clangBasic + clangToolingSyntaxPseudo + ) + +clang_target_link_libraries(clang-pseudo + PRIVATE + ${CLANG_PSEUDO_LIB_DEPS} + ) diff --git a/clang/tools/clang-pseudo/ClangPseudo.cpp b/clang/tools/clang-pseudo/ClangPseudo.cpp new file mode 100644 --- /dev/null +++ b/clang/tools/clang-pseudo/ClangPseudo.cpp @@ -0,0 +1,47 @@ +//===-- ClangPseudo.cpp - Clang pseudo parser tool ------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "clang/Tooling/Syntax/Pseudo/Grammar.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/MemoryBuffer.h" + +using clang::syntax::pseudo::Grammar; +using llvm::cl::desc; +using llvm::cl::init; +using llvm::cl::opt; + +static opt + CheckGrammar("check-grammar", desc("Parse and check a BNF grammar file."), + init("")); + +int main(int argc, char *argv[]) { + llvm::cl::ParseCommandLineOptions(argc, argv, ""); + + if (CheckGrammar.getNumOccurrences()) { + llvm::ErrorOr> Text = + llvm::MemoryBuffer::getFile(CheckGrammar); + if (std::error_code EC = Text.getError()) { + llvm::errs() << "Error: can't read grammar file '" << CheckGrammar + << "': " << EC.message() << "\n"; + return 1; + } + std::vector Diags; + auto RSpecs = Grammar::parseBNF(Text.get()->getBuffer(), Diags); + + if (!Diags.empty()) { + llvm::errs() << llvm::join(Diags, "\n"); + return 2; + } + llvm::errs() << llvm::formatv("grammar file {0} is parsed successfully\n", + CheckGrammar); + return 0; + } + return 0; +}