diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/Language.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Language.h @@ -19,6 +19,12 @@ class TokenStream; class LRTable; +struct GuardParams { + llvm::ArrayRef RHS; + const TokenStream &Tokens; + // FIXME: use the index of Tokens. + SymbolID Lookahead; +}; // A guard restricts when a grammar rule can be used. // // The GLR parser will use the guard to determine whether a rule reduction will @@ -26,8 +32,7 @@ // `virt-specifier := IDENTIFIER` only if the identifier's text is 'override`. // // Return true if the guard is satisfied. -using RuleGuard = llvm::function_ref RHS, const TokenStream &)>; +using RuleGuard = llvm::function_ref; // A recovery strategy determines a region of code to skip when parsing fails. // diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -421,7 +421,7 @@ if (!R.Guarded) return true; if (auto Guard = Lang.Guards.lookup(RID)) - return Guard(RHS, Params.Code); + return Guard({RHS, Params.Code, Lookahead}); LLVM_DEBUG(llvm::dbgs() << llvm::formatv("missing guard implementation for rule {0}\n", Lang.G.dumpRule(RID))); diff --git a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp --- a/clang-tools-extra/pseudo/lib/cxx/CXX.cpp +++ b/clang-tools-extra/pseudo/lib/cxx/CXX.cpp @@ -156,15 +156,19 @@ llvm_unreachable("unreachable"); } +bool guardNextTokenNotElse(const GuardParams &P) { + return symbolToToken(P.Lookahead) != tok::kw_else; +} + llvm::DenseMap buildGuards() { #define TOKEN_GUARD(kind, cond) \ - [](llvm::ArrayRef RHS, const TokenStream &Tokens) { \ - const Token &Tok = onlyToken(tok::kind, RHS, Tokens); \ + [](const GuardParams& P) { \ + const Token &Tok = onlyToken(tok::kind, P.RHS, P.Tokens); \ return cond; \ } #define SYMBOL_GUARD(kind, cond) \ - [](llvm::ArrayRef RHS, const TokenStream &Tokens) { \ - const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, RHS, Tokens); \ + [](const GuardParams& P) { \ + const ForestNode &N = onlySymbol((SymbolID)Symbol::kind, P.RHS, P.Tokens); \ return cond; \ } return { @@ -186,6 +190,11 @@ {(RuleID)Rule::contextual_zero_0numeric_constant, TOKEN_GUARD(numeric_constant, Tok.text() == "0")}, + {(RuleID)Rule::selection_statement_0if_1l_paren_2condition_3r_paren_4statement, + guardNextTokenNotElse}, + {(RuleID)Rule::selection_statement_0if_1constexpr_2l_paren_3condition_4r_paren_5statement, + guardNextTokenNotElse}, + // The grammar distinguishes (only) user-defined vs plain string literals, // where the clang lexer distinguishes (only) encoding types. {(RuleID)Rule::user_defined_string_literal_chunk_0string_literal, diff --git a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf --- a/clang-tools-extra/pseudo/lib/cxx/cxx.bnf +++ b/clang-tools-extra/pseudo/lib/cxx/cxx.bnf @@ -290,7 +290,7 @@ compound-statement := { statement-seq_opt [recover=Brackets] } statement-seq := statement statement-seq := statement-seq statement -selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement +selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement [guard] selection-statement := IF CONSTEXPR_opt ( init-statement_opt condition ) statement ELSE statement selection-statement := SWITCH ( init-statement_opt condition ) statement iteration-statement := WHILE ( condition ) statement diff --git a/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp new file mode 100644 --- /dev/null +++ b/clang-tools-extra/pseudo/test/cxx/dangling-else.cpp @@ -0,0 +1,22 @@ +// RUN: clang-pseudo -grammar=cxx -source=%s --start-symbol=statement-seq --print-forest | FileCheck %s + +// Verify the else should belong to the nested if statement +if (true) if (true) {} else {} + +// CHECK: statement-seq~selection-statement := IF ( condition ) statement +// CHECK-NEXT: ├─IF +// CHECK-NEXT: ├─( +// CHECK-NEXT: ├─condition~TRUE +// CHECK-NEXT: ├─) +// CHECK-NEXT: └─statement~selection-statement +// CHECK-NEXT: ├─IF +// CHECK-NEXT: ├─( +// CHECK-NEXT: ├─condition~TRUE +// CHECK-NEXT: ├─) +// CHECK-NEXT: ├─statement~compound-statement := { } +// CHECK-NEXT: │ ├─{ +// CHECK-NEXT: │ └─} +// CHECK-NEXT: ├─ELSE +// CHECK-NEXT: └─statement~compound-statement := { } +// CHECK-NEXT: ├─{ +// CHECK-NEXT: └─} diff --git a/clang-tools-extra/pseudo/unittests/GLRTest.cpp b/clang-tools-extra/pseudo/unittests/GLRTest.cpp --- a/clang-tools-extra/pseudo/unittests/GLRTest.cpp +++ b/clang-tools-extra/pseudo/unittests/GLRTest.cpp @@ -634,11 +634,12 @@ start := IDENTIFIER [guard] )bnf"); TestLang.Guards.try_emplace( - ruleFor("start"), - [&](llvm::ArrayRef RHS, const TokenStream &Tokens) { - assert(RHS.size() == 1 && - RHS.front()->symbol() == tokenSymbol(clang::tok::identifier)); - return Tokens.tokens()[RHS.front()->startTokenIndex()].text() == "test"; + ruleFor("start"), [&](const GuardParams &P) { + assert(P.RHS.size() == 1 && + P.RHS.front()->symbol() == + tokenSymbol(clang::tok::identifier)); + return P.Tokens.tokens()[P.RHS.front()->startTokenIndex()] + .text() == "test"; }); clang::LangOptions LOptions; TestLang.Table = LRTable::buildSLR(TestLang.G);