diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h --- a/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h @@ -86,7 +86,7 @@ } // Error recovery strategies. // FIXME: these should be provided as extensions instead. -enum class RecoveryStrategy : uint8_t { None, Braces }; +enum class RecoveryStrategy : uint8_t { None, Braces, Eof }; // An extension is a piece of native code specific to a grammar that modifies // the behavior of annotated rules. One ExtensionID is assigned for each unique diff --git a/clang-tools-extra/pseudo/lib/GLR.cpp b/clang-tools-extra/pseudo/lib/GLR.cpp --- a/clang-tools-extra/pseudo/lib/GLR.cpp +++ b/clang-tools-extra/pseudo/lib/GLR.cpp @@ -30,13 +30,21 @@ findRecoveryEndpoint(RecoveryStrategy Strategy, const GSS::Node *RecoveryNode, const TokenStream &Tokens) { - assert(Strategy == RecoveryStrategy::Braces); - const ForestNode *LBrace = RecoveryNode->Payload; - assert(LBrace->kind() == ForestNode::Terminal && - LBrace->symbol() == tokenSymbol(tok::l_brace)); - if (const Token *RBrace = Tokens.tokens()[LBrace->startTokenIndex()].pair()) - return Tokens.index(*RBrace); - return llvm::None; + switch (Strategy) { + case RecoveryStrategy::Braces: { + const ForestNode *LBrace = RecoveryNode->Payload; + assert(LBrace->kind() == ForestNode::Terminal && + LBrace->symbol() == tokenSymbol(tok::l_brace)); + if (const Token *RBrace = Tokens.tokens()[LBrace->startTokenIndex()].pair()) + return Tokens.index(*RBrace); + return llvm::None; + } + case RecoveryStrategy::Eof: + return Tokens.tokens().size(); + case RecoveryStrategy::None: + break; + } + llvm_unreachable("Invalid strategy"); } } // namespace @@ -589,10 +597,7 @@ // so we can keep parsing. if (NextHeads.empty()) { glrRecover(Heads, I, Tokens, Params, NextHeads); - if (NextHeads.empty()) - // FIXME: Ensure the `_ := start-symbol` rules have a fallback - // error-recovery strategy attached. Then this condition can't happen. - return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0); + assert(!NextHeads.empty() && "No fallback recovery?!"); } else ++I; @@ -631,12 +636,9 @@ unsigned I = Terminals.size(); glrRecover(Heads, I, Tokens, Params, NextHeads); Reduce(NextHeads, tokenSymbol(tok::eof)); - if (auto *Result = SearchForAccept(NextHeads)) - return *Result; - - // We failed to parse the input, returning an opaque forest node for recovery. - // FIXME: as above, we can add fallback error handling so this is impossible. - return Params.Forest.createOpaque(StartSymbol, /*Token::Index=*/0); + auto *Result = SearchForAccept(NextHeads); + assert(Result && "No fallback recovery?"); + return *Result; } void glrReduce(std::vector &Heads, SymbolID Lookahead, diff --git a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp --- a/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp +++ b/clang-tools-extra/pseudo/lib/grammar/GrammarBNF.cpp @@ -106,9 +106,10 @@ assert(T->Rules.size() < (1 << RuleBits) && "Too many rules to fit in RuleID bits!"); - // Wherever RHS contains { foo }, mark foo for brace-recovery. - // FIXME: this should be grammar annotations instead. + // Infer recovery strategies. for (auto &Rule : T->Rules) { + // Wherever RHS contains { foo }, mark foo for brace-recovery. + // FIXME: this should be grammar annotations instead. for (unsigned I = 2; I < Rule.Size; ++I) if (Rule.Sequence[I] == tokenSymbol(tok::r_brace) && Rule.Sequence[I - 2] == tokenSymbol(tok::l_brace) && @@ -116,6 +117,12 @@ Rule.Recovery = RecoveryStrategy::Braces; Rule.RecoveryIndex = I - 1; } + // In `_ := symbol`, match symbol against the whole input on error. + // This ensures the overall parse never fails. + if (T->Nonterminals[Rule.Target].Name == "_") { + Rule.Recovery = RecoveryStrategy::Eof; + Rule.RecoveryIndex = 0; + } } const auto &SymbolOrder = getTopologicalOrder(T.get()); llvm::stable_sort(