diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h @@ -19,6 +19,11 @@ class TokenStream; class LRTable; +struct GuardParams { + llvm::ArrayRef RHS; + const TokenStream &Tokens; + SymbolID Lookahead; +}; // A guard restricts when a grammar rule can be used. // // The GLR parser will use the guard to determine whether a rule reduction will @@ -26,8 +31,7 @@ // `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`. // // Return true if the guard is satisfied. -using RuleGuard = llvm::function_ref RHS, const TokenStream &)>; +using RuleGuard = llvm::function_ref; // A recovery strategy determines a region of code to skip when parsing fails. // diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -421,7 +421,7 @@ if (!GuardID) return true; if (auto Guard = Lang.Guards.lookup(GuardID)) - return Guard(RHS, Params.Code); + return Guard({RHS, Params.Code, Lookahead}); LLVM_DEBUG(llvm::dbgs() << llvm::formatv("missing guard implementation for rule {0}\n", Lang.G.dumpRule(RID))); diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -11,6 +11,7 @@ #include "clang-pseudo/Language.h" #include "clang-pseudo/grammar/Grammar.h" #include "clang-pseudo/grammar/LRTable.h" +#include "clang/Basic/TokenKinds.h" #include namespace clang { @@ -21,29 +22,29 @@ #include "CXXBNF.inc" ; -bool guardOverride(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "override"; +bool guardOverride(const GuardParams &Params) { + assert(Params.RHS.size() == 1 && + Params.RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); + return Params.Tokens.tokens()[Params.RHS.front()->startTokenIndex()].text() == + "override"; } -bool guardFinal(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "final"; +bool guardFinal(const GuardParams &Params) { + assert(Params.RHS.size() == 1 && + Params.RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); + return Params.Tokens.tokens()[Params.RHS.front()->startTokenIndex()].text() == + "final"; } -bool guardModule(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "module"; +bool guardModule(const GuardParams &Params) { + return Params.Tokens.tokens()[Params.RHS.front()->startTokenIndex()].text() == + "module"; } -bool guardImport(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "import"; +bool guardImport(const GuardParams &Params) { + return Params.Tokens.tokens()[Params.RHS.front()->startTokenIndex()].text() == + "import"; } -bool guardExport(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "export"; +bool guardExport(const GuardParams &Params) { + return Params.Tokens.tokens()[Params.RHS.front()->startTokenIndex()].text() == + "export"; } bool isFunctionDeclarator(const ForestNode *Declarator) { @@ -93,17 +94,19 @@ } llvm_unreachable("unreachable"); } -bool guardFunction(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == (SymbolID)(cxx::Symbol::declarator)); - return isFunctionDeclarator(RHS.front()); +bool guardFunction(const GuardParams &Params) { + assert(Params.RHS.size() == 1 && + Params.RHS.front()->symbol() == (SymbolID)(cxx::Symbol::declarator)); + return isFunctionDeclarator(Params.RHS.front()); } -bool guardNonFunction(llvm::ArrayRef RHS, - const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == (SymbolID)(cxx::Symbol::declarator)); - return !isFunctionDeclarator(RHS.front()); +bool guardNonFunction(const GuardParams &Params) { + assert(Params.RHS.size() == 1 && + Params.RHS.front()->symbol() == (SymbolID)(cxx::Symbol::declarator)); + return !isFunctionDeclarator(Params.RHS.front()); +} + +bool guardNextTokenNotElse(const GuardParams &Params) { + return symbolToToken(Params.Lookahead) != tok::kw_else; } llvm::DenseMap buildGuards() { @@ -115,6 +118,7 @@ {(ExtensionID)Extension::Module, guardModule}, {(ExtensionID)Extension::FunctionDeclarator, guardFunction}, {(ExtensionID)Extension::NonFunctionDeclarator, guardNonFunction}, + {(ExtensionID)Extension::NextTokenNotElse, guardNextTokenNotElse}, }; } diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf --- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -290,7 +290,7 @@ compound-statement := { statement-seq_opt [recover=Brackets] } statement-seq := statement statement-seq := statement-seq statement -selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement +selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement [guard=NextTokenNotElse] selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement ELSE statement selection-statement := SWITCH ( init-statement_opt condition ) statement iteration-statement := WHILE ( condition ) statement diff --git a/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp @@ -0,0 +1,27 @@ +// RUN: clang-pseudo -grammar=cxx -source=%s --start-symbol=statement-seq --print-forest | FileCheck %s + +if (true) + if (true) { + + } + else { // should belong to the nested if statement + + } + +// CHECK: statement-seq~selection-statement := IF ( condition ) statement +// CHECK-NEXT: ├─IF +// CHECK-NEXT: ├─( +// CHECK-NEXT: ├─condition~TRUE +// CHECK-NEXT: ├─) +// CHECK-NEXT: └─statement~selection-statement +// CHECK-NEXT: ├─IF +// CHECK-NEXT: ├─( +// CHECK-NEXT: ├─condition~TRUE +// CHECK-NEXT: ├─) +// CHECK-NEXT: ├─statement~compound-statement := { } +// CHECK-NEXT: │ ├─{ +// CHECK-NEXT: │ └─} +// CHECK-NEXT: ├─ELSE +// CHECK-NEXT: └─statement~compound-statement := { } +// CHECK-NEXT: ├─{ +// CHECK-NEXT: └─} diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -634,11 +634,12 @@ start := IDENTIFIER [guard=TestOnly] )bnf"); TestLang.Guards.try_emplace( - extensionID("TestOnly"), - [&](llvm::ArrayRef RHS, const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test"; + extensionID("TestOnly"), [&](const GuardParams &Params) { + assert(Params.RHS.size() == 1 && + Params.RHS.front()->symbol() == + tokenSymbol(clang::tok::identifier)); + return Params.Tokens.tokens()[Params.RHS.front()->startTokenIndex()] + .text() == "test"; }); clang::LangOptions LOptions; TestLang.Table = LRTable::buildSLR(TestLang.G);