Diff 438735

clang-tools-extra/pseudo/benchmarks/Benchmark.cpp

Show First 20 Lines • Show All 71 Lines • ▼ Show 20 Lines

static void parseBNF(benchmark::State &State) {		static void parseBNF(benchmark::State &State) {
std::vector<std::string> Diags;		std::vector<std::string> Diags;
for (auto _ : State)		for (auto _ : State)
Grammar::parseBNF(*GrammarText, Diags);		Grammar::parseBNF(*GrammarText, Diags);
}		}
BENCHMARK(parseBNF);		BENCHMARK(parseBNF);

static void buildSLR(benchmark::State &State) {		static void buildLR0(benchmark::State &State) {
for (auto _ : State)		for (auto _ : State)
LRTable::buildSLR(*G);		LRTable::buildLR0(*G);
}		}
BENCHMARK(buildSLR);		BENCHMARK(buildLR0);

TokenStream lexAndPreprocess() {		TokenStream lexAndPreprocess() {
clang::LangOptions LangOpts = genericLangOpts();		clang::LangOptions LangOpts = genericLangOpts();
TokenStream RawStream = pseudo::lex(*SourceText, LangOpts);		TokenStream RawStream = pseudo::lex(*SourceText, LangOpts);
auto DirectiveStructure = DirectiveTree::parse(RawStream);		auto DirectiveStructure = DirectiveTree::parse(RawStream);
chooseConditionalBranches(DirectiveStructure, RawStream);		chooseConditionalBranches(DirectiveStructure, RawStream);
TokenStream Cook =		TokenStream Cook =
cook(DirectiveStructure.stripDirectives(RawStream), LangOpts);		cook(DirectiveStructure.stripDirectives(RawStream), LangOpts);
Show All 31 Lines	stripComments(
cook(DirectiveStructure.stripDirectives(RawStream), LangOpts));		cook(DirectiveStructure.stripDirectives(RawStream), LangOpts));
}		}
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *		State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
SourceText->size());		SourceText->size());
}		}
BENCHMARK(preprocess);		BENCHMARK(preprocess);

static void glrParse(benchmark::State &State) {		static void glrParse(benchmark::State &State) {
LRTable Table = clang::pseudo::LRTable::buildSLR(*G);		LRTable Table = clang::pseudo::LRTable::buildLR0(*G);
SymbolID StartSymbol = *G->findNonterminal("translation-unit");		SymbolID StartSymbol = *G->findNonterminal("translation-unit");
TokenStream Stream = lexAndPreprocess();		TokenStream Stream = lexAndPreprocess();
for (auto _ : State) {		for (auto _ : State) {
pseudo::ForestArena Forest;		pseudo::ForestArena Forest;
pseudo::GSS GSS;		pseudo::GSS GSS;
pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);		pseudo::glrParse(Stream, ParseParams{*G, Table, Forest, GSS}, StartSymbol);
}		}
State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *		State.SetBytesProcessed(static_cast<uint64_t>(State.iterations()) *
SourceText->size());		SourceText->size());
}		}
BENCHMARK(glrParse);		BENCHMARK(glrParse);

static void full(benchmark::State &State) {		static void full(benchmark::State &State) {
LRTable Table = clang::pseudo::LRTable::buildSLR(*G);		LRTable Table = clang::pseudo::LRTable::buildLR0(*G);
SymbolID StartSymbol = *G->findNonterminal("translation-unit");		SymbolID StartSymbol = *G->findNonterminal("translation-unit");
for (auto _ : State) {		for (auto _ : State) {
TokenStream Stream = lexAndPreprocess();		TokenStream Stream = lexAndPreprocess();
pseudo::ForestArena Forest;		pseudo::ForestArena Forest;
pseudo::GSS GSS;		pseudo::GSS GSS;
pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS},		pseudo::glrParse(lexAndPreprocess(), ParseParams{*G, Table, Forest, GSS},
StartSymbol);		StartSymbol);
}		}
Show All 17 Lines

clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp

	Show All 19 Lines

	namespace clang {			namespace clang {
	namespace pseudo {			namespace pseudo {
	namespace {			namespace {

	class Fuzzer {			class Fuzzer {
	clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();			clang::LangOptions LangOpts = clang::pseudo::genericLangOpts();
	std::unique_ptr<Grammar> G;			std::unique_ptr<Grammar> G;
	LRTable T;			llvm::Optional<LRTable> T;
	bool Print;			bool Print;

	public:			public:
	Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {			Fuzzer(llvm::StringRef GrammarPath, bool Print) : Print(Print) {
	llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =			llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> GrammarText =
	llvm::MemoryBuffer::getFile(GrammarPath);			llvm::MemoryBuffer::getFile(GrammarPath);
	if (std::error_code EC = GrammarText.getError()) {			if (std::error_code EC = GrammarText.getError()) {
	llvm::errs() << "Error: can't read grammar file '" << GrammarPath			llvm::errs() << "Error: can't read grammar file '" << GrammarPath
	<< "': " << EC.message() << "\n";			<< "': " << EC.message() << "\n";
	std::exit(1);			std::exit(1);
	}			}
	std::vector<std::string> Diags;			std::vector<std::string> Diags;
	G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);			G = Grammar::parseBNF(GrammarText->get()->getBuffer(), Diags);
	if (!Diags.empty()) {			if (!Diags.empty()) {
	for (const auto &Diag : Diags)			for (const auto &Diag : Diags)
	llvm::errs() << Diag << "\n";			llvm::errs() << Diag << "\n";
	std::exit(1);			std::exit(1);
	}			}
	T = LRTable::buildSLR(*G);			T = LRTable::buildLR0(*G);
	}			}

	void operator()(llvm::StringRef Code) {			void operator()(llvm::StringRef Code) {
	std::string CodeStr = Code.str(); // Must be null-terminated.			std::string CodeStr = Code.str(); // Must be null-terminated.
	auto RawStream = lex(CodeStr, LangOpts);			auto RawStream = lex(CodeStr, LangOpts);
	auto DirectiveStructure = DirectiveTree::parse(RawStream);			auto DirectiveStructure = DirectiveTree::parse(RawStream);
	clang::pseudo::chooseConditionalBranches(DirectiveStructure, RawStream);			clang::pseudo::chooseConditionalBranches(DirectiveStructure, RawStream);
	// FIXME: strip preprocessor directives			// FIXME: strip preprocessor directives
	auto ParseableStream =			auto ParseableStream =
	clang::pseudo::stripComments(cook(RawStream, LangOpts));			clang::pseudo::stripComments(cook(RawStream, LangOpts));

	clang::pseudo::ForestArena Arena;			clang::pseudo::ForestArena Arena;
	clang::pseudo::GSS GSS;			clang::pseudo::GSS GSS;
	auto &Root =			auto &Root = glrParse(ParseableStream,
	glrParse(ParseableStream, clang::pseudo::ParseParams{*G, T, Arena, GSS},			clang::pseudo::ParseParams{G, T, Arena, GSS},
	*G->findNonterminal("translation-unit"));			*G->findNonterminal("translation-unit"));
	if (Print)			if (Print)
	llvm::outs() << Root.dumpRecursive(*G);			llvm::outs() << Root.dumpRecursive(*G);
	}			}
	};			};

	Fuzzer *Fuzz = nullptr;			Fuzzer *Fuzz = nullptr;

	} // namespace			} // namespace
	Show All 36 Lines

clang-tools-extra/pseudo/include/clang-pseudo/GLR.h

	Show First 20 Lines • Show All 126 Lines • ▼ Show 20 Lines
	// and returns a forest node of the start symbol.			// and returns a forest node of the start symbol.
	//			//
	// A rule `_ := StartSymbol` must exit for the chosen start symbol.			// A rule `_ := StartSymbol` must exit for the chosen start symbol.
	//			//
	// If the parsing fails, we model it as an opaque node in the forest.			// If the parsing fails, we model it as an opaque node in the forest.
	const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params,			const ForestNode &glrParse(const TokenStream &Code, const ParseParams &Params,
	SymbolID StartSymbol);			SymbolID StartSymbol);

	// An active stack head can have multiple available actions (reduce/reduce
	// actions, reduce/shift actions).
	// A step is any one action applied to any one stack head.
	struct ParseStep {
	// A specific stack head.
	const GSS::Node *Head = nullptr;
	// An action associated with the head.
	LRTable::Action Action = LRTable::Action::sentinel();
	};
	// A callback is invoked whenever a new GSS head is created during the GLR
	// parsing process (glrShift, or glrReduce).
	using NewHeadCallback = std::function<void(const GSS::Node *)>;
	// Apply all PendingShift actions on a given GSS state, newly-created heads are			// Apply all PendingShift actions on a given GSS state, newly-created heads are
	// passed to the callback.			// passed to the callback.
	//			//
	// When this function returns, PendingShift is empty.			// When this function returns, PendingShift is empty.
	//			//
	// Exposed for testing only.			// Exposed for testing only.
	void glrShift(std::vector<ParseStep> &PendingShift, const ForestNode &NextTok,			void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
	const ParseParams &Params, NewHeadCallback NewHeadCB);			const ForestNode &NextTok, const ParseParams &Params,
				std::vector<const GSS::Node *> &NewHeads);
	// Applies PendingReduce actions, until no more reduce actions are available.			// Applies PendingReduce actions, until no more reduce actions are available.
	//			//
	// When this function returns, PendingReduce is empty. Calls to NewHeadCB may			// When this function returns, PendingReduce is empty. Calls to NewHeadCB may
	// add elements to PendingReduce			// add elements to PendingReduce
	//			//
	// Exposed for testing only.			// Exposed for testing only.
	void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,			void glrReduce(std::vector<const GSS::Node *> &Heads,
	NewHeadCallback NewHeadCB);			const ParseParams &Params);

	} // namespace pseudo			} // namespace pseudo
	} // namespace clang			} // namespace clang

	#endif // CLANG_PSEUDO_GLR_H			#endif // CLANG_PSEUDO_GLR_H

clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h

Show First 20 Lines • Show All 168 Lines • ▼ Show 20 Lines	public:
const GrammarTable &table() const { return *T; }		const GrammarTable &table() const { return *T; }

private:		private:
std::unique_ptr<GrammarTable> T;		std::unique_ptr<GrammarTable> T;
// The symbol ID of '_'. (In the LR literature, this is the start symbol of		// The symbol ID of '_'. (In the LR literature, this is the start symbol of
// the augmented grammar.)		// the augmented grammar.)
SymbolID Underscore;		SymbolID Underscore;
};		};
// For each nonterminal X, computes the set of terminals that begin strings
// derived from X. (Known as FIRST sets in grammar-based parsers).
std::vector<llvm::DenseSet<SymbolID>> firstSets(const Grammar &);
// For each nonterminal X, computes the set of terminals that could immediately
// follow X. (Known as FOLLOW sets in grammar-based parsers).
std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &);

// Storage for the underlying data of the Grammar.		// Storage for the underlying data of the Grammar.
// It can be constructed dynamically (from compiling BNF file) or statically		// It can be constructed dynamically (from compiling BNF file) or statically
// (a compiled data-source).		// (a compiled data-source).
struct GrammarTable {		struct GrammarTable {
GrammarTable();		GrammarTable();

struct Nonterminal {		struct Nonterminal {
Show All 34 Lines

clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h

	Show All 32 Lines
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#ifndef CLANG_PSEUDO_GRAMMAR_LRTABLE_H			#ifndef CLANG_PSEUDO_GRAMMAR_LRTABLE_H
	#define CLANG_PSEUDO_GRAMMAR_LRTABLE_H			#define CLANG_PSEUDO_GRAMMAR_LRTABLE_H

	#include "clang-pseudo/grammar/Grammar.h"			#include "clang-pseudo/grammar/Grammar.h"
	#include "llvm/ADT/ArrayRef.h"			#include "llvm/ADT/ArrayRef.h"
				#include "llvm/ADT/DenseMap.h"
				#include "llvm/ADT/STLExtras.h"
				#include "llvm/Support/Capacity.h"
				#include "llvm/Support/raw_ostream.h"
	#include <cstdint>			#include <cstdint>
	#include <vector>			#include <vector>

	namespace clang {			namespace clang {
	namespace pseudo {			namespace pseudo {
				struct StateSymbol {
				using StateID = uint16_t; // XXX
				StateID State;
				SymbolID Symbol;
				};
				} // namespace pseudo
				} // namespace clang
				namespace llvm {
				template <> struct llvm::DenseMapInfo<clang::pseudo::StateSymbol> {
				using StateSymbol = clang::pseudo::StateSymbol;
				static inline StateSymbol getEmptyKey() {
				return StateSymbol{StateSymbol::StateID(-1), 0};
				}
				static inline StateSymbol getTombstoneKey() {
				return StateSymbol{StateSymbol::StateID(-1), 1};
				}
				static unsigned getHashValue(const StateSymbol &Val) {
				return (Val.State * 2754435761U) ^ Val.Symbol; // Knuth hash
				}
				static bool isEqual(const StateSymbol &LHS, const StateSymbol &RHS) {
				return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol;
				}
				};
				} // namespace llvm
				namespace clang {
				namespace pseudo {

	// Represents the LR parsing table, which can efficiently the question "what is			// Represents the LR parsing table, which can efficiently the question "what is
	// the next step given the lookahead token and current state on top of the			// the next step given the lookahead token and current state on top of the
	// stack?".			// stack?".
	//			//
	// This is a dense implementation, which only takes an amount of space that is			// This is a dense implementation, which only takes an amount of space that is
	// proportional to the number of non-empty entries in the table.			// proportional to the number of non-empty entries in the table.
	//			//
	// Unlike the typical LR parsing table which allows at most one available action			// Unlike the typical LR parsing table which allows at most one available action
	// per entry, conflicted actions are allowed in LRTable. The LRTable is designed			// per entry, conflicted actions are allowed in LRTable. The LRTable is designed
	// to be used in nondeterministic LR parsers (e.g. GLR).			// to be used in nondeterministic LR parsers (e.g. GLR).
	class LRTable {			class LRTable {
	public:			public:
	// StateID is only 13 bits wide.			// StateID is only 13 bits wide.
	using StateID = uint16_t;			using StateID = uint16_t;
	static constexpr unsigned StateBits = 13;			static constexpr unsigned StateBits = 13;

	// Action represents the terminal and nonterminal actions, it combines the			// Return the list of reductions applicable from a given state.
	// entry of the ACTION and GOTO tables from the LR literature.			// These correspond to items with the dot at the end.
	class Action {			llvm::ArrayRef<RuleID> getReduceRules(StateID State) const {
	public:			return Reduce.find(State);
	enum Kind : uint8_t {			}
	Sentinel = 0,			// Return the state to transition to after shifting a token Terminal.
	// Terminal actions, corresponding to entries of ACTION table.			// Returns None if this terminal is not legal here.
				llvm::Optional<StateID> getShiftState(StateID From, SymbolID Terminal) const {
	// Shift to state n: move forward with the lookahead, and push state n			auto It = Shift.find(StateSymbol{From, Terminal});
	// onto the state stack.			if (It == Shift.end())
	// A shift is a forward transition, and the value n is the next state that			return llvm::None;
	// the parser is to enter.			return It->second;
	Shift,			}
	// Reduce by a rule: pop the state stack.			// Return the state to transition to after reducing a symbol Nonterminal.
	Reduce,			// REQUIRES: this nonterminal is legal here.
				StateID getGoToState(StateID From, SymbolID Nonterminal) const {
	// NOTE: there are no typical accept actions in the LRtable, accept			auto It = Goto.find(StateSymbol{From, Nonterminal});
	// actions are handled specifically in the parser -- if the parser			assert(It != Goto.end());
	// reaches to a target state (which is goto(StartState, StartSymbol)) at			return It->second;
	// the EOF token after a reduce, this indicates the input has been parsed
	// as the StartSymbol successfully.

	// Nonterminal actions, corresponding to entry of GOTO table.

	// Go to state n: push state n onto the state stack.
	// Similar to Shift, but it is a nonterminal forward transition.
	GoTo,
	};

	static Action goTo(StateID S) { return Action(GoTo, S); }
	static Action shift(StateID S) { return Action(Shift, S); }
	static Action reduce(RuleID RID) { return Action(Reduce, RID); }
	static Action sentinel() { return Action(Sentinel, 0); }

	StateID getShiftState() const {
	assert(kind() == Shift);
	return Value;
	}
	StateID getGoToState() const {
	assert(kind() == GoTo);
	return Value;
	}
	RuleID getReduceRule() const {
	assert(kind() == Reduce);
	return Value;
	}			}
	Kind kind() const { return static_cast<Kind>(K); }

	bool operator==(const Action &L) const { return opaque() == L.opaque(); }
	uint16_t opaque() const { return K << ValueBits \| Value; };

	private:
	Action(Kind K1, unsigned Value) : K(K1), Value(Value) {}
	static constexpr unsigned ValueBits = StateBits;
	static constexpr unsigned KindBits = 3;
	static_assert(ValueBits >= RuleBits, "Value must be able to store RuleID");
	static_assert(KindBits + ValueBits <= 16,
	"Must be able to store kind and value efficiently");
	uint16_t K : KindBits;
	// Either StateID or RuleID, depending on the Kind.
	uint16_t Value : ValueBits;
	};

	// Returns all available actions for the given state on a terminal.
	// Expected to be called by LR parsers.
	llvm::ArrayRef<Action> getActions(StateID State, SymbolID Terminal) const;
	// Returns the state after we reduce a nonterminal.
	// Expected to be called by LR parsers.
	StateID getGoToState(StateID State, SymbolID Nonterminal) const;

	// Looks up available actions.
	// Returns empty if no available actions in the table.
	llvm::ArrayRef<Action> find(StateID State, SymbolID Symbol) const;

	// Returns the state from which the LR parser should start to parse the input			// Returns the state from which the LR parser should start to parse the input
	// tokens as the given StartSymbol.			// tokens as the given StartSymbol.
	//			//
	// In LR parsing, the start state of `translation-unit` corresponds to			// In LR parsing, the start state of `translation-unit` corresponds to
	// `_ := • translation-unit`.			// `_ := • translation-unit`.
	//			//
	// Each start state responds to a single grammar rule like `_ := start`.			// Each start state responds to a single grammar rule like `_ := start`.
	// REQUIRE: The given StartSymbol must exist in the grammar (in a form of			// REQUIRE: The given StartSymbol must exist in the grammar (in a form of
	// `_ := start`).			// `_ := start`).
	StateID getStartState(SymbolID StartSymbol) const;			StateID getStartState(SymbolID StartSymbol) const;

	size_t bytes() const {			size_t bytes() const {
	return sizeof(this) + Actions.capacity() sizeof(Action) +			return sizeof(*this) + llvm::capacity_in_bytes(Shift) +
	Symbols.capacity() * sizeof(SymbolID) +			llvm::capacity_in_bytes(Goto) + Reduce.bytes();
	StateOffset.capacity() * sizeof(uint32_t);
	}			}

	std::string dumpStatistics() const;			std::string dumpStatistics() const;
	std::string dumpForTests(const Grammar &G) const;			std::string dumpForTests(const Grammar &G) const;

	// Build a SLR(1) parsing table.			static LRTable buildLR0(const Grammar &G);
	static LRTable buildSLR(const Grammar &G);			struct Builder {
				unsigned StateCount = 0;
				std::vector<std::pair<StateID, RuleID>> Reduce;
				llvm::DenseMap<StateSymbol, StateID> Shift;
				llvm::DenseMap<StateSymbol, StateID> GoTo;
				std::vector<std::pair<SymbolID, StateID>> StartStates;

	class Builder;			LRTable build() && { return LRTable(std::move(*this)); }
	// Represents an entry in the table, used for building the LRTable.
	struct Entry {
	StateID State;
	SymbolID Symbol;
	Action Act;
	};			};
	// Build a specifid table for testing purposes.
	static LRTable buildForTests(const GrammarTable &, llvm::ArrayRef<Entry>);

	private:			private:
	// Conceptually the LR table is a multimap from (State, SymbolID) => Action.			// A multimap from K => V, where Ks form a dense range [0, n).
	// Our physical representation is quite different for compactness.			// A flat array stores the values:
				// Values = [ values for k=0 \| values for k=1 \| values for k=2 \| ... ]
				// And another stores the index for each K:
				// Keys = [ index for k=0, index for k=1, ... , Values.size()]
				// Lookup[k] is Values[ Keys[k]...Keys[k+1] ]
				template <typename K, typename V> struct OffsetTable {
				std::vector<uint32_t> Keys;
				std::vector<V> Values;

				OffsetTable(std::vector<std::pair<K, V>> &&Pairs, K Limit) {
				assert(llvm::all_of(Pairs, [&](auto &P) { return P.first < Limit; }));
				llvm::stable_sort(Pairs, llvm::less_first{});
				Keys.reserve(Limit + 1);
				Values.reserve(Pairs.size());
				unsigned I = 0;
				for (K Key = 0; Key < Limit; ++Key) {
				Keys.push_back(Values.size());
				while (I < Pairs.size() && Pairs[I].first == Key)
				Values.push_back(Pairs[I++].second);
				}
				Keys.push_back(Values.size());
				assert(Values.size() == Pairs.size());
				assert(Keys.size() == Limit + 1);
				}

				size_t bytes() const {
				return sizeof(*this) + llvm::capacity_in_bytes(Keys) +
				llvm::capacity_in_bytes(Values);
				}
				size_t size() const { return Values.size(); }
				size_t keys() const { return Keys.size() - 1; }

	// Index is StateID, value is the offset into Symbols/Actions			llvm::ArrayRef<V> find(K Key) const {
	// where the entries for this state begin.			return llvm::makeArrayRef(&Values[Keys[Key]], &Values[Keys[Key + 1]]);
	// Give a state id, the corresponding half-open range of Symbols/Actions is			}
	// [StateOffset[id], StateOffset[id+1]).			};
	std::vector<uint32_t> StateOffset;
	// Parallel to Actions, the value is SymbolID (columns of the matrix).			llvm::DenseMap<StateSymbol, StateID> Shift;
	// Grouped by the StateID, and only subranges are sorted.			OffsetTable<StateID, RuleID> Reduce;
	std::vector<SymbolID> Symbols;			llvm::DenseMap<StateSymbol, StateID> Goto;
	// A flat list of available actions, sorted by (State, SymbolID).
	std::vector<Action> Actions;
	// A sorted table, storing the start state for each target parsing symbol.			// A sorted table, storing the start state for each target parsing symbol.
	std::vector<std::pair<SymbolID, StateID>> StartStates;			std::vector<std::pair<SymbolID, StateID>> StartStates;

				LRTable(Builder B);
	};			};
	llvm::raw_ostream &operator<<(llvm::raw_ostream &, const LRTable::Action &);

	} // namespace pseudo			} // namespace pseudo
	} // namespace clang			} // namespace clang

	#endif // CLANG_PSEUDO_GRAMMAR_LRTABLE_H			#endif // CLANG_PSEUDO_GRAMMAR_LRTABLE_H

clang-tools-extra/pseudo/lib/GLR.cpp

Show All 40 Lines	const ForestNode &glrParse(const TokenStream &Tokens, const ParseParams &Params,
SymbolID StartSymbol) {		SymbolID StartSymbol) {
assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal");		assert(isNonterminal(StartSymbol) && "Start symbol must be a nonterminal");
llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens);		llvm::ArrayRef<ForestNode> Terminals = Params.Forest.createTerminals(Tokens);
auto &G = Params.G;		auto &G = Params.G;
(void)G;		(void)G;
auto &GSS = Params.GSStack;		auto &GSS = Params.GSStack;

// Lists of active shift, reduce actions.		// Lists of active shift, reduce actions.
std::vector<ParseStep> PendingShift, PendingReduce;
auto AddSteps = [&](const GSS::Node *Head, SymbolID NextTok) {
for (const auto &Action : Params.Table.getActions(Head->State, NextTok)) {
switch (Action.kind()) {
case LRTable::Action::Shift:
PendingShift.push_back({Head, Action});
break;
case LRTable::Action::Reduce:
PendingReduce.push_back({Head, Action});
break;
default:
llvm_unreachable("unexpected action kind!");
}
}
};
StateID StartState = Params.Table.getStartState(StartSymbol);		StateID StartState = Params.Table.getStartState(StartSymbol);
std::vector<const GSS::Node *> NewHeads = {		std::vector<const GSS::Node *> Heads = {
GSS.addNode(/State=/StartState,		GSS.addNode(/State=/StartState,
/ForestNode=/nullptr, {})};		/ForestNode=/nullptr, {})};
		std::vector<const GSS::Node *> NextHeads;
auto MaybeGC = [&, Roots(std::vector<const GSS::Node *>{}), I(0u)]() mutable {		auto MaybeGC = [&, Roots(std::vector<const GSS::Node *>{}), I(0u)]() mutable {
assert(PendingShift.empty() && PendingReduce.empty() &&		assert(NextHeads.empty() && "Running GC at the wrong time!");
"Running GC at the wrong time!");

if (++I != 20) // Run periodically to balance CPU and memory usage.		if (++I != 20) // Run periodically to balance CPU and memory usage.
return;		return;
I = 0;		I = 0;

// We need to copy the list: Roots is consumed by the GC.		// We need to copy the list: Roots is consumed by the GC.
Roots = NewHeads;		Roots = Heads;
GSS.gc(std::move(Roots));		GSS.gc(std::move(Roots));
};		};
for (const ForestNode &Terminal : Terminals) {		for (const ForestNode &Terminal : Terminals) {
LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n",		LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next token {0} (id={1})\n",
G.symbolName(Terminal.symbol()),		G.symbolName(Terminal.symbol()),
Terminal.symbol()));		Terminal.symbol()));
for (const auto *Head : NewHeads)		glrShift(Heads, Terminal, Params, NextHeads);
AddSteps(Head, Terminal.symbol());		std::swap(Heads, NextHeads);
NewHeads.clear();		glrReduce(Heads, Params);
glrReduce(PendingReduce, Params,		NextHeads.clear();
[&](const GSS::Node * NewHead) {
// A reduce will enable more steps.
AddSteps(NewHead, Terminal.symbol());
});

glrShift(PendingShift, Terminal, Params,
[&](const GSS::Node *NewHead) { NewHeads.push_back(NewHead); });
MaybeGC();		MaybeGC();
}		}
LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Next is eof\n"));		LLVM_DEBUG(llvm::dbgs() << llvm::formatv("Reached eof\n"));
for (const auto *Heads : NewHeads)
AddSteps(Heads, tokenSymbol(tok::eof));

StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);		StateID AcceptState = Params.Table.getGoToState(StartState, StartSymbol);
// Collect new heads created from the final reduce.		// Collect new heads created from the final reduce.
std::vector<const GSS::Node*> Heads;
glrReduce(PendingReduce, Params, [&](const GSS::Node *NewHead) {
Heads.push_back(NewHead);
// A reduce will enable more steps.
AddSteps(NewHead, tokenSymbol(tok::eof));
});

const ForestNode *Result = nullptr;		const ForestNode *Result = nullptr;
for (const auto *Head : Heads) {		for (const auto *Head : Heads) {
if (Head->State == AcceptState) {		if (Head->State == AcceptState) {
assert(Head->Payload->symbol() == StartSymbol);		assert(Head->Payload->symbol() == StartSymbol);
assert(Result == nullptr && "multiple results!");		assert(Result == nullptr && "multiple results!");
Result = Head->Payload;		Result = Head->Payload;
}		}
}		}
Show All 15 Lines
// shifting a token, we shift only once by combining these heads.		// shifting a token, we shift only once by combining these heads.
//		//
// E.g. we have two heads (2, 3) in the GSS, and will shift both to reach 4:		// E.g. we have two heads (2, 3) in the GSS, and will shift both to reach 4:
// 0---1---2		// 0---1---2
// └---3		// └---3
// After the shift action, the GSS is:		// After the shift action, the GSS is:
// 0---1---2---4		// 0---1---2---4
// └---3---┘		// └---3---┘
void glrShift(std::vector<ParseStep> &PendingShift, const ForestNode &NewTok,		void glrShift(llvm::ArrayRef<const GSS::Node *> OldHeads,
const ParseParams &Params, NewHeadCallback NewHeadCB) {		const ForestNode &NewTok, const ParseParams &Params,
		std::vector<const GSS::Node *> &NewHeads) {
assert(NewTok.kind() == ForestNode::Terminal);		assert(NewTok.kind() == ForestNode::Terminal);
assert(llvm::all_of(PendingShift,
[](const ParseStep &Step) {
return Step.Action.kind() == LRTable::Action::Shift;
}) &&
"Pending shift actions must be shift actions");
LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" Shift {0} ({1} active heads):\n",		LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" Shift {0} ({1} active heads):\n",
Params.G.symbolName(NewTok.symbol()),		Params.G.symbolName(NewTok.symbol()),
PendingShift.size()));		OldHeads.size()));

// We group pending shifts by their target state so we can merge them.		// We group pending shifts by their target state so we can merge them.
llvm::stable_sort(PendingShift, [](const ParseStep &L, const ParseStep &R) {		llvm::SmallVector<std::pair<StateID, const GSS::Node *>, 8> Shifts;
return L.Action.getShiftState() < R.Action.getShiftState();		for (const auto* H : OldHeads)
});		if (auto S = Params.Table.getShiftState(H->State, NewTok.symbol()))
auto Rest = llvm::makeArrayRef(PendingShift);		Shifts.push_back({*S, H});
		llvm::stable_sort(Shifts, llvm::less_first{});

		auto Rest = llvm::makeArrayRef(Shifts);
llvm::SmallVector<const GSS::Node *> Parents;		llvm::SmallVector<const GSS::Node *> Parents;
while (!Rest.empty()) {		while (!Rest.empty()) {
// Collect the batch of PendingShift that have compatible shift states.		// Collect the batch of PendingShift that have compatible shift states.
// Their heads become TempParents, the parents of the new GSS node.		// Their heads become TempParents, the parents of the new GSS node.
StateID NextState = Rest.front().Action.getShiftState();		StateID NextState = Rest.front().first;

Parents.clear();		Parents.clear();
for (const auto &Base : Rest) {		for (const auto &Base : Rest) {
if (Base.Action.getShiftState() != NextState)		if (Base.first != NextState)
break;		break;
Parents.push_back(Base.Head);		Parents.push_back(Base.second);
}		}
Rest = Rest.drop_front(Parents.size());		Rest = Rest.drop_front(Parents.size());

LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" --> S{0} ({1} heads)\n",		LLVM_DEBUG(llvm::dbgs() << llvm::formatv(" --> S{0} ({1} heads)\n",
NextState, Parents.size()));		NextState, Parents.size()));
NewHeadCB(Params.GSStack.addNode(NextState, &NewTok, Parents));		NewHeads.push_back(Params.GSStack.addNode(NextState, &NewTok, Parents));
}		}
PendingShift.clear();
}		}

namespace {		namespace {
// A KeyedQueue yields pairs of keys and values in order of the keys.		// A KeyedQueue yields pairs of keys and values in order of the keys.
template <typename Key, typename Value>		template <typename Key, typename Value>
using KeyedQueue =		using KeyedQueue =
std::priority_queue<std::pair<Key, Value>,		std::priority_queue<std::pair<Key, Value>,
std::vector<std::pair<Key, Value>>, llvm::less_first>;		std::vector<std::pair<Key, Value>>, llvm::less_first>;
▲ Show 20 Lines • Show All 41 Lines • ▼ Show 20 Lines
// └--2(cv-qualifier)--┘ // goto(2, type-name)		// └--2(cv-qualifier)--┘ // goto(2, type-name)
//		//
// Before (joining due to same goto state, the same base):		// Before (joining due to same goto state, the same base):
// 0--1(class-name)--3(STAR)		// 0--1(class-name)--3(STAR)
// └--2(enum-name)--4(STAR)		// └--2(enum-name)--4(STAR)
// After reducing 3 by `pointer := class-name STAR` and		// After reducing 3 by `pointer := class-name STAR` and
// 2 by`enum-name := class-name STAR`:		// 2 by`enum-name := class-name STAR`:
// 0--5(pointer) // 5 is goto(0, pointer)		// 0--5(pointer) // 5 is goto(0, pointer)
void glrReduce(std::vector<ParseStep> &PendingReduce, const ParseParams &Params,		void glrReduce(std::vector<const GSS::Node *> &Heads,
NewHeadCallback NewHeadCB) {		const ParseParams &Params) {
// There are two interacting complications:		// There are two interacting complications:
// 1. Performing one reduce can unlock new reduces on the newly-created head.		// 1. Performing one reduce can unlock new reduces on the newly-created head.
// 2a. The ambiguous ForestNodes must be complete (have all sequence nodes).		// 2a. The ambiguous ForestNodes must be complete (have all sequence nodes).
// This means we must have unlocked all the reduces that contribute to it.		// This means we must have unlocked all the reduces that contribute to it.
// 2b. Similarly, the new GSS nodes must be complete (have all parents).		// 2b. Similarly, the new GSS nodes must be complete (have all parents).
//		//
// We define a "family" of reduces as those that produce the same symbol and		// We define a "family" of reduces as those that produce the same symbol and
// cover the same range of tokens. These are exactly the set of reductions		// cover the same range of tokens. These are exactly the set of reductions
▲ Show 20 Lines • Show All 42 Lines • ▼ Show 20 Lines	void glrReduce(std::vector<const GSS::Node *> &Heads,
struct PushSpec {		struct PushSpec {
// A base node is the head after popping the GSS nodes we are reducing.		// A base node is the head after popping the GSS nodes we are reducing.
const GSS::Node* Base = nullptr;		const GSS::Node* Base = nullptr;
Sequence Seq;		Sequence Seq;
};		};
KeyedQueue<Family, PushSpec> Sequences;		KeyedQueue<Family, PushSpec> Sequences;

Sequence TempSequence;		Sequence TempSequence;

		// We treat Heads as a queue of Pop operations still to be performed.
		// PoppedHeads is our position within it.
		unsigned PoppedHeads = 0;
		// In general the sequencing is complicated: each pop can yield multiple
		// pending pushes that might run in a different order than we found them.
		// However in trivial cases (only pop that yields only one push) we can
		// bypass all these fancy queues and pop+push directly. This is very common.
		auto PopAndPushTrivial = [&]() -> bool {
		if (!Sequences.empty() \|\| Heads.size() != PoppedHeads + 1)
		return false;
		const GSS::Node *Head = Heads.back();
		auto Rules = Params.Table.getReduceRules(Head->State);
		if (Rules.size() != 1)
		return false;
		const auto &Rule = Params.G.lookupRule(Rules.front());
		const GSS::Node *Base = Head;
		TempSequence.resize_for_overwrite(Rule.Size);
		for (unsigned I = 0; I < Rule.Size; ++I) {
		if (Base->parents().size() != 1)
		return false;
		TempSequence[Rule.Size - 1 - I] = Base->Payload;
		Base = Base->parents().front();
		}
		const ForestNode *Parsed =
		&Params.Forest.createSequence(Rule.Target, Rules.front(), TempSequence);
		StateID NextState = Params.Table.getGoToState(Base->State, Rule.Target);
		Heads.push_back(Params.GSStack.addNode(NextState, Parsed, {Base}));
		return true;
		};
// Pop walks up the parent chain(s) for a reduction from Head by to Rule.		// Pop walks up the parent chain(s) for a reduction from Head by to Rule.
// Once we reach the end, record the bases and sequences.		// Once we reach the end, record the bases and sequences.
auto Pop = [&](const GSS::Node *Head, RuleID RID) {		auto Pop = [&](const GSS::Node *Head, RuleID RID) {
LLVM_DEBUG(llvm::dbgs() << " Pop " << Params.G.dumpRule(RID) << "\n");		LLVM_DEBUG(llvm::dbgs() << " Pop " << Params.G.dumpRule(RID) << "\n");
const auto &Rule = Params.G.lookupRule(RID);		const auto &Rule = Params.G.lookupRule(RID);
Family F{/Start=/0, /Symbol=/Rule.Target, /Rule=/RID};		Family F{/Start=/0, /Symbol=/Rule.Target, /Rule=/RID};
TempSequence.resize_for_overwrite(Rule.Size);		TempSequence.resize_for_overwrite(Rule.Size);
auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {		auto DFS = [&](const GSS::Node *N, unsigned I, auto &DFS) {
if (I == Rule.Size) {		if (I == Rule.Size) {
F.Start = TempSequence.front()->startTokenIndex();		F.Start = TempSequence.front()->startTokenIndex();
LLVM_DEBUG(llvm::dbgs() << " --> base at S" << N->State << "\n");		LLVM_DEBUG(llvm::dbgs() << " --> base at S" << N->State << "\n");
Sequences.emplace(F, PushSpec{N, TempSequence});		Sequences.emplace(F, PushSpec{N, TempSequence});
return;		return;
}		}
TempSequence[Rule.Size - 1 - I] = N->Payload;		TempSequence[Rule.Size - 1 - I] = N->Payload;
for (const GSS::Node *Parent : N->parents())		for (const GSS::Node *Parent : N->parents())
DFS(Parent, I + 1, DFS);		DFS(Parent, I + 1, DFS);
};		};
DFS(Head, 0, DFS);		DFS(Head, 0, DFS);
};		};
auto PopPending = [&] {		auto PopPending = [&] {
for (const ParseStep &Pending : PendingReduce)		for (; PoppedHeads < Heads.size(); ++PoppedHeads) {
Pop(Pending.Head, Pending.Action.getReduceRule());		if (PopAndPushTrivial())
PendingReduce.clear();		continue;
		for (RuleID R : Params.Table.getReduceRules(Heads[PoppedHeads]->State))
		Pop(Heads[PoppedHeads], R);
		}
};		};

std::vector<std::pair</Goto/ StateID, const GSS::Node *>> FamilyBases;		std::vector<std::pair</Goto/ StateID, const GSS::Node *>> FamilyBases;
std::vector<std::pair<RuleID, Sequence>> FamilySequences;		std::vector<std::pair<RuleID, Sequence>> FamilySequences;

std::vector<const GSS::Node *> TempGSSNodes;		std::vector<const GSS::Node *> TempGSSNodes;
std::vector<const ForestNode *> TempForestNodes;		std::vector<const ForestNode *> TempForestNodes;

▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Lines	while (!BasesLeft.empty()) {
Parents.clear();		Parents.clear();
for (const auto &Base : BasesLeft) {		for (const auto &Base : BasesLeft) {
if (Base.first != NextState)		if (Base.first != NextState)
break;		break;
Parents.push_back(Base.second);		Parents.push_back(Base.second);
}		}
BasesLeft = BasesLeft.drop_front(Parents.size());		BasesLeft = BasesLeft.drop_front(Parents.size());

// Invoking the callback for new heads, a real GLR parser may add new		Heads.push_back(Params.GSStack.addNode(NextState, Parsed, Parents));
// reduces to the PendingReduce queue!
NewHeadCB(Params.GSStack.addNode(NextState, Parsed, Parents));
}		}
PopPending();		PopPending();
}		}
assert(Sequences.empty());		assert(Sequences.empty());
}		}

const GSS::Node GSS::addNode(LRTable::StateID State, const ForestNode Symbol,		const GSS::Node GSS::addNode(LRTable::StateID State, const ForestNode Symbol,
llvm::ArrayRef<const Node *> Parents) {		llvm::ArrayRef<const Node *> Parents) {
▲ Show 20 Lines • Show All 67 Lines • Show Last 20 Lines

clang-tools-extra/pseudo/lib/cxx/CXX.cpp

	Show All 19 Lines
	const Grammar &getGrammar() {			const Grammar &getGrammar() {
	static std::vector<std::string> Diags;			static std::vector<std::string> Diags;
	static Grammar *G = Grammar::parseBNF(CXXBNF, Diags).release();			static Grammar *G = Grammar::parseBNF(CXXBNF, Diags).release();
	assert(Diags.empty());			assert(Diags.empty());
	return *G;			return *G;
	}			}

	const LRTable &getLRTable() {			const LRTable &getLRTable() {
	static LRTable *Table = new LRTable(LRTable::buildSLR(getGrammar()));			static LRTable *Table = new LRTable(LRTable::buildLR0(getGrammar()));
	return *Table;			return *Table;
	}			}

	} // namespace cxx			} // namespace cxx
	} // namespace pseudo			} // namespace pseudo
	} // namespace clang			} // namespace clang

clang-tools-extra/pseudo/lib/grammar/Grammar.cpp

Show First 20 Lines • Show All 81 Lines • ▼ Show 20 Lines	std::string Grammar::dump() const {
for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID)		for (SymbolID SID = 0; SID < T->Nonterminals.size(); ++SID)
OS << llvm::formatv(" {0} {1}\n", SID, symbolName(SID));		OS << llvm::formatv(" {0} {1}\n", SID, symbolName(SID));
OS << "Rules:\n";		OS << "Rules:\n";
for (RuleID RID = 0; RID < T->Rules.size(); ++RID)		for (RuleID RID = 0; RID < T->Rules.size(); ++RID)
OS << llvm::formatv(" {0} {1}\n", RID, dumpRule(RID));		OS << llvm::formatv(" {0} {1}\n", RID, dumpRule(RID));
return OS.str();		return OS.str();
}		}

std::vector<llvm::DenseSet<SymbolID>> firstSets(const Grammar &G) {
std::vector<llvm::DenseSet<SymbolID>> FirstSets(
G.table().Nonterminals.size());
auto ExpandFirstSet = [&FirstSets](SymbolID Target, SymbolID First) {
assert(isNonterminal(Target));
if (isToken(First))
return FirstSets[Target].insert(First).second;
bool Changed = false;
for (SymbolID SID : FirstSets[First])
Changed \|= FirstSets[Target].insert(SID).second;
return Changed;
};

// A rule S := T ... implies elements in FIRST(S):
// - if T is a terminal, FIRST(S) contains T
// - if T is a nonterminal, FIRST(S) contains FIRST(T)
// Since FIRST(T) may not have been fully computed yet, FIRST(S) itself may
// end up being incomplete.
// We iterate until we hit a fixed point.
// (This isn't particularly efficient, but table building isn't on the
// critical path).
bool Changed = true;
while (Changed) {
Changed = false;
for (const auto &R : G.table().Rules)
// We only need to consider the first element because symbols are
// non-nullable.
Changed \|= ExpandFirstSet(R.Target, R.seq().front());
}
return FirstSets;
}

std::vector<llvm::DenseSet<SymbolID>> followSets(const Grammar &G) {
auto FirstSets = firstSets(G);
std::vector<llvm::DenseSet<SymbolID>> FollowSets(
G.table().Nonterminals.size());
// Expand the follow set of a nonterminal symbol Y by adding all from the
// given symbol set.
auto ExpandFollowSet = [&FollowSets](SymbolID Y,
const llvm::DenseSet<SymbolID> &ToAdd) {
assert(isNonterminal(Y));
bool Changed = false;
for (SymbolID F : ToAdd)
Changed \|= FollowSets[Y].insert(F).second;
return Changed;
};
// Follow sets is computed based on the following 3 rules, the computation
// is completed at a fixed point where there is no more new symbols can be
// added to any of the follow sets.
//
// Rule 1: add endmarker to the FOLLOW(S), where S is the start symbol of the
// augmented grammar, in our case it is '_'.
FollowSets[G.underscore()].insert(tokenSymbol(tok::eof));
bool Changed = true;
while (Changed) {
Changed = false;
for (const auto &R : G.table().Rules) {
// Rule 2: for a rule X := ... Y Z, we add all symbols from FIRST(Z) to
// FOLLOW(Y).
for (size_t I = 0; I + 1 < R.seq().size(); ++I) {
if (isToken(R.seq()[I]))
continue;
// We only need to consider the next symbol because symbols are
// non-nullable.
SymbolID Next = R.seq()[I + 1];
if (isToken(Next))
// First set for a terminal is itself.
Changed \|= ExpandFollowSet(R.seq()[I], {Next});
else
Changed \|= ExpandFollowSet(R.seq()[I], FirstSets[Next]);
}
// Rule 3: for a rule X := ... Z, we add all symbols from FOLLOW(X) to
// FOLLOW(Z).
SymbolID Z = R.seq().back();
if (isNonterminal(Z))
Changed \|= ExpandFollowSet(Z, FollowSets[R.Target]);
}
}
return FollowSets;
}

static llvm::ArrayRef<std::string> getTerminalNames() {		static llvm::ArrayRef<std::string> getTerminalNames() {
static const auto &TerminalNames = []() {		static const auto &TerminalNames = []() {
auto TerminalNames = new std::string[NumTerminals];		auto TerminalNames = new std::string[NumTerminals];
#define PUNCTUATOR(Tok, Spelling) TerminalNames[tok::Tok] = Spelling;		#define PUNCTUATOR(Tok, Spelling) TerminalNames[tok::Tok] = Spelling;
#define KEYWORD(Keyword, Condition) \		#define KEYWORD(Keyword, Condition) \
TerminalNames[tok::kw_##Keyword] = llvm::StringRef(#Keyword).upper();		TerminalNames[tok::kw_##Keyword] = llvm::StringRef(#Keyword).upper();
#define TOK(Tok) TerminalNames[tok::Tok] = llvm::StringRef(#Tok).upper();		#define TOK(Tok) TerminalNames[tok::Tok] = llvm::StringRef(#Tok).upper();
#include "clang/Basic/TokenKinds.def"		#include "clang/Basic/TokenKinds.def"
return llvm::makeArrayRef(TerminalNames, NumTerminals);		return llvm::makeArrayRef(TerminalNames, NumTerminals);
}();		}();
return TerminalNames;		return TerminalNames;
}		}
GrammarTable::GrammarTable() : Terminals(getTerminalNames()) {}		GrammarTable::GrammarTable() : Terminals(getTerminalNames()) {}

} // namespace pseudo		} // namespace pseudo
} // namespace clang		} // namespace clang

clang-tools-extra/pseudo/lib/grammar/LRTable.cpp

	//===--- LRTable.cpp - Parsing table for LR parsers --------------- C++--===//			//===--- LRTable.cpp - Parsing table for LR parsers --------------- C++--===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "clang-pseudo/grammar/LRTable.h"			#include "clang-pseudo/grammar/LRTable.h"
	#include "clang-pseudo/grammar/Grammar.h"			#include "clang-pseudo/grammar/Grammar.h"
	#include "llvm/ADT/ArrayRef.h"			#include "llvm/ADT/ArrayRef.h"
	#include "llvm/ADT/STLExtras.h"			#include "llvm/ADT/STLExtras.h"
	#include "llvm/Support/ErrorHandling.h"
	#include "llvm/Support/FormatVariadic.h"			#include "llvm/Support/FormatVariadic.h"
	#include "llvm/Support/raw_ostream.h"			#include "llvm/Support/raw_ostream.h"

	namespace clang {			namespace clang {
	namespace pseudo {			namespace pseudo {

	llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const LRTable::Action &A) {
	switch (A.kind()) {
	case LRTable::Action::Shift:
	return OS << llvm::formatv("shift state {0}", A.getShiftState());
	case LRTable::Action::Reduce:
	return OS << llvm::formatv("reduce by rule {0}", A.getReduceRule());
	case LRTable::Action::GoTo:
	return OS << llvm::formatv("go to state {0}", A.getGoToState());
	case LRTable::Action::Sentinel:
	llvm_unreachable("unexpected Sentinel action kind!");
	}
	llvm_unreachable("unexpected action kind!");
	}

	std::string LRTable::dumpStatistics() const {			std::string LRTable::dumpStatistics() const {
	return llvm::formatv(R"(			return llvm::formatv(R"(
	Statistics of the LR parsing table:			Statistics of the LR parsing table:
	number of states: {0}			number of actions: shift={0} reduce={1} goto={2}
	number of actions: {1}			size of the table (bytes): {3}
	size of the table (bytes): {2}
	)",			)",
	StateOffset.size() - 1, Actions.size(), bytes())			Shift.size(), Reduce.size(), Goto.size(), bytes())
	.str();			.str();
	}			}

	std::string LRTable::dumpForTests(const Grammar &G) const {			std::string LRTable::dumpForTests(const Grammar &G) const {
	std::string Result;			std::string Result;
	llvm::raw_string_ostream OS(Result);			llvm::raw_string_ostream OS(Result);
	OS << "LRTable:\n";			OS << "LRTable:\n";
	for (StateID S = 0; S < StateOffset.size() - 1; ++S) {			for (StateID S = 0; S < Reduce.keys(); ++S) {
	OS << llvm::formatv("State {0}\n", S);			OS << llvm::formatv("State {0}\n", S);
				for (RuleID R : getReduceRules(S))
				OS.indent(4) << llvm::formatv("reduce by rule {0} '{1}'\n", R,
				G.dumpRule(R));
	for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) {			for (uint16_t Terminal = 0; Terminal < NumTerminals; ++Terminal) {
	SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));			SymbolID TokID = tokenSymbol(static_cast<tok::TokenKind>(Terminal));
	for (auto A : find(S, TokID)) {			if (auto Next = getShiftState(S, TokID))
	if (A.kind() == LRTable::Action::Shift)
	OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n",			OS.indent(4) << llvm::formatv("'{0}': shift state {1}\n",
	G.symbolName(TokID), A.getShiftState());			G.symbolName(TokID), *Next);
	else if (A.kind() == LRTable::Action::Reduce)
	OS.indent(4) << llvm::formatv("'{0}': reduce by rule {1} '{2}'\n",
	G.symbolName(TokID), A.getReduceRule(),
	G.dumpRule(A.getReduceRule()));
	}
	}			}
	for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size();			for (SymbolID NontermID = 0; NontermID < G.table().Nonterminals.size();
	++NontermID) {			++NontermID) {
	if (find(S, NontermID).empty())			auto It = Goto.find(StateSymbol{S, NontermID});
	continue;			if (It != Goto.end())
	OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n",			OS.indent(4) << llvm::formatv("'{0}': go to state {1}\n",
	G.symbolName(NontermID),			G.symbolName(NontermID), It->second);
	getGoToState(S, NontermID));
	}			}
	}			}
	return OS.str();			return OS.str();
	}			}

	llvm::ArrayRef<LRTable::Action> LRTable::getActions(StateID State,
	SymbolID Terminal) const {
	assert(pseudo::isToken(Terminal) && "expect terminal symbol!");
	return find(State, Terminal);
	}

	LRTable::StateID LRTable::getGoToState(StateID State,
	SymbolID Nonterminal) const {
	assert(pseudo::isNonterminal(Nonterminal) && "expected nonterminal symbol!");
	auto Result = find(State, Nonterminal);
	assert(Result.size() == 1 && Result.front().kind() == Action::GoTo);
	return Result.front().getGoToState();
	}

	llvm::ArrayRef<LRTable::Action> LRTable::find(StateID Src, SymbolID ID) const {
	assert(Src + 1u < StateOffset.size());
	std::pair<size_t, size_t> Range =
	std::make_pair(StateOffset[Src], StateOffset[Src + 1]);
	auto SymbolRange = llvm::makeArrayRef(Symbols.data() + Range.first,
	Symbols.data() + Range.second);

	assert(llvm::is_sorted(SymbolRange) &&
	"subrange of the Symbols should be sorted!");
	const LRTable::StateID *Start =
	llvm::partition_point(SymbolRange, [&ID](SymbolID S) { return S < ID; });
	if (Start == SymbolRange.end())
	return {};
	const LRTable::StateID *End = Start;
	while (End != SymbolRange.end() && *End == ID)
	++End;
	return llvm::makeArrayRef(&Actions[Start - Symbols.data()],
	/length=/End - Start);
	}

	LRTable::StateID LRTable::getStartState(SymbolID Target) const {			LRTable::StateID LRTable::getStartState(SymbolID Target) const {
	assert(llvm::is_sorted(StartStates) && "StartStates must be sorted!");			assert(llvm::is_sorted(StartStates) && "StartStates must be sorted!");
	auto It = llvm::partition_point(			auto It = llvm::partition_point(
	StartStates, [Target](const std::pair<SymbolID, StateID> &X) {			StartStates, [Target](const std::pair<SymbolID, StateID> &X) {
	return X.first < Target;			return X.first < Target;
	});			});
	assert(It != StartStates.end() && It->first == Target &&			assert(It != StartStates.end() && It->first == Target &&
	"target symbol doesn't have a start state!");			"target symbol doesn't have a start state!");
	return It->second;			return It->second;
	}			}

				LRTable::LRTable(Builder B)
				: Shift(std::move(B.Shift)), Reduce(std::move(B.Reduce), B.StateCount),
				Goto(std::move(B.GoTo)), StartStates(std::move(B.StartStates)) {
				assert(llvm::all_of(Shift,
				[&](auto &E) { return E.first.State < B.StateCount; }));
				assert(llvm::all_of(Goto,
				[&](auto &E) { return E.first.State < B.StateCount; }));
				}

	} // namespace pseudo			} // namespace pseudo
	} // namespace clang			} // namespace clang

clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp

	//===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------- C++--===//			//===--- LRTableBuild.cpp - Build a LRTable from LRGraph ---------- C++--===//
	//			//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.			// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.			// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception			// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//			//
	//===----------------------------------------------------------------------===//			//===----------------------------------------------------------------------===//

	#include "clang-pseudo/grammar/Grammar.h"			#include "clang-pseudo/grammar/Grammar.h"
	#include "clang-pseudo/grammar/LRGraph.h"			#include "clang-pseudo/grammar/LRGraph.h"
	#include "clang-pseudo/grammar/LRTable.h"			#include "clang-pseudo/grammar/LRTable.h"
	#include "clang/Basic/TokenKinds.h"
	#include <cstdint>			#include <cstdint>

	namespace llvm {
	template <> struct DenseMapInfo<clang::pseudo::LRTable::Entry> {
	using Entry = clang::pseudo::LRTable::Entry;
	static inline Entry getEmptyKey() {
	static Entry E{static_cast<clang::pseudo::SymbolID>(-1), 0,
	clang::pseudo::LRTable::Action::sentinel()};
	return E;
	}
	static inline Entry getTombstoneKey() {
	static Entry E{static_cast<clang::pseudo::SymbolID>(-2), 0,
	clang::pseudo::LRTable::Action::sentinel()};
	return E;
	}
	static unsigned getHashValue(const Entry &I) {
	return llvm::hash_combine(I.State, I.Symbol, I.Act.opaque());
	}
	static bool isEqual(const Entry &LHS, const Entry &RHS) {
	return LHS.State == RHS.State && LHS.Symbol == RHS.Symbol &&
	LHS.Act == RHS.Act;
	}
	};
	} // namespace llvm

	namespace clang {			namespace clang {
	namespace pseudo {			namespace pseudo {

	class LRTable::Builder {			LRTable LRTable::buildLR0(const Grammar &G) {
	public:			auto Graph = LRGraph::buildLR0(G);
	Builder(llvm::ArrayRef<std::pair<SymbolID, StateID>> StartStates)			assert(Graph.states().size() <= (1 << StateBits) &&
	: StartStates(StartStates) {}			"Graph states execceds the maximum limit!");

	bool insert(Entry E) { return Entries.insert(std::move(E)).second; }
	LRTable build(const GrammarTable &GT, unsigned NumStates) && {
	// E.g. given the following parsing table with 3 states and 3 terminals:
	//
	// a b c
	// +-------+----+-------+-+
	// \|state0 \| \| s0,r0 \| \|
	// \|state1 \| acc\| \| \|
	// \|state2 \| \| r1 \| \|
	// +-------+----+-------+-+
	//
	// The final LRTable:
	// - StateOffset: [s0] = 0, [s1] = 2, [s2] = 3, [sentinel] = 4
	// - Symbols: [ b, b, a, b]
	// Actions: [ s0, r0, acc, r1]
	// ~~~~~~ range for state 0
	// ~~~~ range for state 1
	// ~~ range for state 2
	// First step, we sort all entries by (State, Symbol, Action).
	std::vector<Entry> Sorted(Entries.begin(), Entries.end());
	llvm::sort(Sorted, [](const Entry &L, const Entry &R) {
	return std::forward_as_tuple(L.State, L.Symbol, L.Act.opaque()) <
	std::forward_as_tuple(R.State, R.Symbol, R.Act.opaque());
	});

	LRTable Table;
	Table.Actions.reserve(Sorted.size());
	Table.Symbols.reserve(Sorted.size());
	// We are good to finalize the States and Actions.
	for (const auto &E : Sorted) {
	Table.Actions.push_back(E.Act);
	Table.Symbols.push_back(E.Symbol);
	}
	// Initialize the terminal and nonterminal offset, all ranges are empty by
	// default.
	Table.StateOffset = std::vector<uint32_t>(NumStates + 1, 0);
	size_t SortedIndex = 0;
	for (StateID State = 0; State < Table.StateOffset.size(); ++State) {
	Table.StateOffset[State] = SortedIndex;
	while (SortedIndex < Sorted.size() && Sorted[SortedIndex].State == State)
	++SortedIndex;
	}
	Table.StartStates = std::move(StartStates);
	return Table;
	}

	private:			Builder B;
	llvm::DenseSet<Entry> Entries;			B.StartStates = Graph.startStates();
	std::vector<std::pair<SymbolID, StateID>> StartStates;			B.StateCount = Graph.states().size();
	};

	LRTable LRTable::buildForTests(const GrammarTable &GT,
	llvm::ArrayRef<Entry> Entries) {
	StateID MaxState = 0;
	for (const auto &Entry : Entries)
	MaxState = std::max(MaxState, Entry.State);
	Builder Build({});
	for (const Entry &E : Entries)
	Build.insert(E);
	return std::move(Build).build(GT, /NumStates=/MaxState + 1);
	}

	LRTable LRTable::buildSLR(const Grammar &G) {
	auto Graph = LRGraph::buildLR0(G);
	Builder Build(Graph.startStates());
	for (const auto &T : Graph.edges()) {			for (const auto &T : Graph.edges()) {
	Action Act = isToken(T.Label) ? Action::shift(T.Dst) : Action::goTo(T.Dst);			(isToken(T.Label) ? B.Shift : B.GoTo)
	Build.insert({T.Src, T.Label, Act});			.try_emplace(StateSymbol{T.Src, T.Label}, T.Dst);
	}			}
	assert(Graph.states().size() <= (1 << StateBits) &&
	"Graph states execceds the maximum limit!");
	auto FollowSets = followSets(G);
	for (StateID SID = 0; SID < Graph.states().size(); ++SID) {			for (StateID SID = 0; SID < Graph.states().size(); ++SID) {
	for (const Item &I : Graph.states()[SID].Items) {			for (const Item &I : Graph.states()[SID].Items) {
	// If we've just parsed the start symbol, this means we successfully parse
	// the input. We don't add the reduce action of `_ := start_symbol` in the
	// LRTable (the GLR parser handles it specifically).
	if (G.lookupRule(I.rule()).Target == G.underscore() && !I.hasNext())
	continue;
	if (!I.hasNext()) {			if (!I.hasNext()) {
	// If we've reached the end of a rule A := ..., then we can reduce if			// If we've just parsed the start symbol, this means we successfully
	// the next token is in the follow set of A.			// parse the input. We don't add the reduce action of `_ :=
	for (SymbolID Follow : FollowSets[G.lookupRule(I.rule()).Target]) {			// start_symbol` in the LRTable (the GLR parser handles it
	assert(isToken(Follow));			// specifically).
	Build.insert({SID, Follow, Action::reduce(I.rule())});			if (G.lookupRule(I.rule()).Target == G.underscore())
	}			continue;
				// If we've reached the end of a rule A := ..., then we can reduce.
				B.Reduce.push_back({SID, I.rule()});
	}			}
	}			}
	}			}
	return std::move(Build).build(G.table(), Graph.states().size());			return LRTable(std::move(B));
	}			}

	} // namespace pseudo			} // namespace pseudo
	} // namespace clang			} // namespace clang

clang-tools-extra/pseudo/test/lr-build-basic.test

	Show All 17 Lines
	# RUN: clang-pseudo -grammar %s -print-table \| FileCheck %s --check-prefix=TABLE			# RUN: clang-pseudo -grammar %s -print-table \| FileCheck %s --check-prefix=TABLE
	# TABLE: LRTable:			# TABLE: LRTable:
	# TABLE-NEXT: State 0			# TABLE-NEXT: State 0
	# TABLE-NEXT: 'IDENTIFIER': shift state 3			# TABLE-NEXT: 'IDENTIFIER': shift state 3
	# TABLE-NEXT: 'expr': go to state 1			# TABLE-NEXT: 'expr': go to state 1
	# TABLE-NEXT: 'id': go to state 2			# TABLE-NEXT: 'id': go to state 2
	# TABLE-NEXT: State 1			# TABLE-NEXT: State 1
	# TABLE-NEXT: State 2			# TABLE-NEXT: State 2
	# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := id'			# TABLE-NEXT: reduce by rule 1 'expr := id'
	# TABLE-NEXT: State 3			# TABLE-NEXT: State 3
	# TABLE-NEXT: 'EOF': reduce by rule 0 'id := IDENTIFIER'			# TABLE-NEXT: reduce by rule 0 'id := IDENTIFIER'

clang-tools-extra/pseudo/test/lr-build-conflicts.test

	Show All 29 Lines
	# RUN: clang-pseudo -grammar %s -print-table \| FileCheck %s --check-prefix=TABLE			# RUN: clang-pseudo -grammar %s -print-table \| FileCheck %s --check-prefix=TABLE
	# TABLE: LRTable:			# TABLE: LRTable:
	# TABLE-NEXT: State 0			# TABLE-NEXT: State 0
	# TABLE-NEXT: 'IDENTIFIER': shift state 2			# TABLE-NEXT: 'IDENTIFIER': shift state 2
	# TABLE-NEXT: 'expr': go to state 1			# TABLE-NEXT: 'expr': go to state 1
	# TABLE-NEXT: State 1			# TABLE-NEXT: State 1
	# TABLE-NEXT: '-': shift state 3			# TABLE-NEXT: '-': shift state 3
	# TABLE-NEXT: State 2			# TABLE-NEXT: State 2
	# TABLE-NEXT: 'EOF': reduce by rule 1 'expr := IDENTIFIER'			# TABLE-NEXT: reduce by rule 1 'expr := IDENTIFIER'
	# TABLE-NEXT: '-': reduce by rule 1 'expr := IDENTIFIER'
	# TABLE-NEXT: State 3			# TABLE-NEXT: State 3
	# TABLE-NEXT: 'IDENTIFIER': shift state 2			# TABLE-NEXT: 'IDENTIFIER': shift state 2
	# TABLE-NEXT: 'expr': go to state 4			# TABLE-NEXT: 'expr': go to state 4
	# TABLE-NEXT: State 4			# TABLE-NEXT: State 4
	# TABLE-NEXT: 'EOF': reduce by rule 0 'expr := expr - expr'			# TABLE-NEXT: reduce by rule 0 'expr := expr - expr'
	# TABLE-NEXT: '-': shift state 3			# TABLE-NEXT: '-': shift state 3
	# TABLE-NEXT: '-': reduce by rule 0 'expr := expr - expr'

clang-tools-extra/pseudo/tool/ClangPseudo.cpp

Show First 20 Lines • Show All 102 Lines • ▼ Show 20 Lines	if (!Diags.empty()) {
return 2;		return 2;
}		}
llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n",		llvm::outs() << llvm::formatv("grammar file {0} is parsed successfully\n",
Grammar);		Grammar);
if (PrintGrammar)		if (PrintGrammar)
llvm::outs() << G->dump();		llvm::outs() << G->dump();
if (PrintGraph)		if (PrintGraph)
llvm::outs() << clang::pseudo::LRGraph::buildLR0(G).dumpForTests(G);		llvm::outs() << clang::pseudo::LRGraph::buildLR0(G).dumpForTests(G);
auto LRTable = clang::pseudo::LRTable::buildSLR(*G);		auto LRTable = clang::pseudo::LRTable::buildLR0(*G);
if (PrintTable)		if (PrintTable)
llvm::outs() << LRTable.dumpForTests(*G);		llvm::outs() << LRTable.dumpForTests(*G);
if (PrintStatistics)		if (PrintStatistics)
llvm::outs() << LRTable.dumpStatistics();		llvm::outs() << LRTable.dumpStatistics();

if (ParseableStream) {		if (ParseableStream) {
clang::pseudo::ForestArena Arena;		clang::pseudo::ForestArena Arena;
clang::pseudo::GSS GSS;		clang::pseudo::GSS GSS;
Show All 24 Lines

clang-tools-extra/pseudo/unittests/GLRTest.cpp

Show All 23 Lines	llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
const std::vector<const GSS::Node *> &Heads) {		const std::vector<const GSS::Node *> &Heads) {
for (const auto *Head : Heads)		for (const auto *Head : Heads)
OS << *Head << "\n";		OS << *Head << "\n";
return OS;		return OS;
}		}

namespace {		namespace {

using Action = LRTable::Action;
using testing::AllOf;		using testing::AllOf;
		using testing::ElementsAre;
		using testing::UnorderedElementsAre;

MATCHER_P(state, StateID, "") { return arg->State == StateID; }		MATCHER_P(state, StateID, "") { return arg->State == StateID; }
MATCHER_P(parsedSymbol, FNode, "") { return arg->Payload == FNode; }		MATCHER_P(parsedSymbol, FNode, "") { return arg->Payload == FNode; }
MATCHER_P(parsedSymbolID, SID, "") { return arg->Payload->symbol() == SID; }		MATCHER_P(parsedSymbolID, SID, "") { return arg->Payload->symbol() == SID; }

testing::Matcher<const GSS::Node *>		testing::Matcher<const GSS::Node *>
parents(llvm::ArrayRef<const GSS::Node *> Parents) {		parents(llvm::ArrayRef<const GSS::Node *> Parents) {
return testing::Property(&GSS::Node::parents,		return testing::Property(&GSS::Node::parents,
Show All 36 Lines	RuleID ruleFor(llvm::StringRef NonterminalName) const {
if (RuleRange.End - RuleRange.Start == 1)		if (RuleRange.End - RuleRange.Start == 1)
return G->table().Nonterminals[id(NonterminalName)].RuleRange.Start;		return G->table().Nonterminals[id(NonterminalName)].RuleRange.Start;
ADD_FAILURE() << "Expected a single rule for " << NonterminalName		ADD_FAILURE() << "Expected a single rule for " << NonterminalName
<< ", but it has " << RuleRange.End - RuleRange.Start		<< ", but it has " << RuleRange.End - RuleRange.Start
<< " rule!\n";		<< " rule!\n";
return 0;		return 0;
}		}

NewHeadCallback captureNewHeads() {
return [this](const GSS::Node *NewHead) {
NewHeadResults.push_back(NewHead);
};
};

protected:		protected:
std::unique_ptr<Grammar> G;		std::unique_ptr<Grammar> G;
ForestArena Arena;		ForestArena Arena;
GSS GSStack;		GSS GSStack;
std::vector<const GSS::Node*> NewHeadResults;
};		};

TEST_F(GLRTest, ShiftMergingHeads) {		TEST_F(GLRTest, ShiftMergingHeads) {
// Given a test case where we have two heads 1, 2, 3 in the GSS, the heads 1,		// Given a test case where we have two heads 1, 2, 3 in the GSS, the heads 1,
// 2 have shift actions to reach state 4, and the head 3 has a shift action to		// 2 have shift actions to reach state 4, and the head 3 has a shift action to
// reach state 5:		// reach state 5:
// 0--1		// 0--1
// └--2		// └--2
// └--3		// └--3
// After the shift action, the GSS (with new heads 4, 5) is:		// After the shift action, the GSS (with new heads 4, 5) is:
// 0---1---4		// 0---1---4
// └---2---┘		// └---2---┘
// └---3---5		// └---3---5
		LRTable::Builder LR;
		LR.StateCount = 6;
		LR.Shift[{1, tokenSymbol(tok::semi)}] = 4;
		LR.Shift[{2, tokenSymbol(tok::semi)}] = 4;
		LR.Shift[{3, tokenSymbol(tok::semi)}] = 5;

auto *GSSNode0 =		auto *GSSNode0 =
GSStack.addNode(/State=/0, /ForestNode=/nullptr, /Parents=/{});		GSStack.addNode(/State=/0, /ForestNode=/nullptr, /Parents=/{});
auto GSSNode1 = GSStack.addNode(/State=/0, /ForestNode=*/nullptr,		auto GSSNode1 = GSStack.addNode(/State=/1, /ForestNode=*/nullptr,
/Parents=/{GSSNode0});		/Parents=/{GSSNode0});
auto GSSNode2 = GSStack.addNode(/State=/0, /ForestNode=*/nullptr,		auto GSSNode2 = GSStack.addNode(/State=/2, /ForestNode=*/nullptr,
/Parents=/{GSSNode0});		/Parents=/{GSSNode0});
auto GSSNode3 = GSStack.addNode(/State=/0, /ForestNode=*/nullptr,		auto GSSNode3 = GSStack.addNode(/State=/3, /ForestNode=*/nullptr,
/Parents=/{GSSNode0});		/Parents=/{GSSNode0});

buildGrammar({}, {}); // Create a fake empty grammar.		buildGrammar({}, {}); // Create a fake empty grammar.
LRTable T = LRTable::buildForTests(G->table(), /Entries=/{});		LRTable T = std::move(LR).build();

ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);		ForestNode &SemiTerminal = Arena.createTerminal(tok::semi, 0);
std::vector<ParseStep> PendingShift = {		std::vector<const GSS::Node *> NewHeads;
{GSSNode1, Action::shift(4)},		glrShift({GSSNode1, GSSNode3, GSSNode2}, SemiTerminal,
{GSSNode3, Action::shift(5)},		{*G, T, Arena, GSStack}, NewHeads);
{GSSNode2, Action::shift(4)},
};
glrShift(PendingShift, SemiTerminal, {*G, T, Arena, GSStack},
captureNewHeads());

EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(		EXPECT_THAT(NewHeads, UnorderedElementsAre(
AllOf(state(4), parsedSymbol(&SemiTerminal),		AllOf(state(4), parsedSymbol(&SemiTerminal),
parents({GSSNode1, GSSNode2})),		parents({GSSNode1, GSSNode2})),
AllOf(state(5), parsedSymbol(&SemiTerminal),		AllOf(state(5), parsedSymbol(&SemiTerminal),
parents({GSSNode3}))))		parents({GSSNode3}))))
<< NewHeadResults;		<< NewHeads;
}		}

TEST_F(GLRTest, ReduceConflictsSplitting) {		TEST_F(GLRTest, ReduceConflictsSplitting) {
// Before (splitting due to R/R conflict):		// Before (splitting due to R/R conflict):
// 0--1(IDENTIFIER)		// 0--1(IDENTIFIER)
// After reducing 1 by `class-name := IDENTIFIER` and		// After reducing 1 by `class-name := IDENTIFIER` and
// `enum-name := IDENTIFIER`:		// `enum-name := IDENTIFIER`:
// 0--2(class-name) // 2 is goto(0, class-name)		// 0--2(class-name) // 2 is goto(0, class-name)
// └--3(enum-name) // 3 is goto(0, enum-name)		// └--3(enum-name) // 3 is goto(0, enum-name)
buildGrammar({"class-name", "enum-name"},		buildGrammar({"class-name", "enum-name"},
{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});		{"class-name := IDENTIFIER", "enum-name := IDENTIFIER"});

LRTable Table = LRTable::buildForTests(		LRTable::Builder LR;
G->table(), {{/State=/0, id("class-name"), Action::goTo(2)},		LR.StateCount = 4;
{/State=/0, id("enum-name"), Action::goTo(3)}});		LR.GoTo[{0, id("class-name")}] = 2;
		LR.GoTo[{0, id("enum-name")}] = 3;
		LR.Reduce.push_back({1, ruleFor("class-name")});
		LR.Reduce.push_back({1, ruleFor("enum-name")});
		LRTable Table = std::move(LR).build();

const auto *GSSNode0 =		const auto *GSSNode0 =
GSStack.addNode(/State=/0, /ForestNode=/nullptr, /Parents=/{});		GSStack.addNode(/State=/0, /ForestNode=/nullptr, /Parents=/{});
const auto *GSSNode1 =		const auto *GSSNode1 =
GSStack.addNode(3, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});		GSStack.addNode(1, &Arena.createTerminal(tok::identifier, 0), {GSSNode0});

std::vector<ParseStep> PendingReduce = {		std::vector<const GSS::Node *> Heads = {GSSNode1};
{GSSNode1, Action::reduce(ruleFor("class-name"))},		glrReduce(Heads, {*G, Table, Arena, GSStack});
{GSSNode1, Action::reduce(ruleFor("enum-name"))}};		EXPECT_THAT(Heads, UnorderedElementsAre(
glrReduce(PendingReduce, {*G, Table, Arena, GSStack},		GSSNode1,
captureNewHeads());
EXPECT_THAT(NewHeadResults,
testing::UnorderedElementsAre(
AllOf(state(2), parsedSymbolID(id("class-name")),		AllOf(state(2), parsedSymbolID(id("class-name")),
parents({GSSNode0})),		parents({GSSNode0})),
AllOf(state(3), parsedSymbolID(id("enum-name")),		AllOf(state(3), parsedSymbolID(id("enum-name")),
parents({GSSNode0})))) << NewHeadResults;		parents({GSSNode0}))))
		<< Heads;
}		}

TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {		TEST_F(GLRTest, ReduceSplittingDueToMultipleBases) {
// Before (splitting due to multiple bases):		// Before (splitting due to multiple bases):
// 2(class-name)--4(*)		// 2(class-name)--4(*)
// 3(enum-name)---┘		// 3(enum-name)---┘
// After reducing 4 by `ptr-operator := *`:		// After reducing 4 by `ptr-operator := *`:
// 2(class-name)--5(ptr-operator) // 5 is goto(2, ptr-operator)		// 2(class-name)--5(ptr-operator) // 5 is goto(2, ptr-operator)
// 3(enum-name)---6(ptr-operator) // 6 is goto(3, ptr-operator)		// 3(enum-name)---6(ptr-operator) // 6 is goto(3, ptr-operator)
buildGrammar({"ptr-operator", "class-name", "enum-name"},		buildGrammar({"ptr-operator", "class-name", "enum-name"},
{"ptr-operator := *"});		{"ptr-operator := *"});

auto ClassNameNode = &Arena.createOpaque(id("class-name"), /TokenIndex=*/0);		auto ClassNameNode = &Arena.createOpaque(id("class-name"), /TokenIndex=*/0);
auto EnumNameNode = &Arena.createOpaque(id("enum-name"), /TokenIndex=*/0);		auto EnumNameNode = &Arena.createOpaque(id("enum-name"), /TokenIndex=*/0);

const auto *GSSNode2 =		const auto *GSSNode2 =
GSStack.addNode(/State=/2, /ForestNode=/ClassNameNode, /Parents=/{});		GSStack.addNode(/State=/2, /ForestNode=/ClassNameNode, /Parents=/{});
const auto *GSSNode3 =		const auto *GSSNode3 =
GSStack.addNode(/State=/3, /ForestNode=/EnumNameNode, /Parents=/{});		GSStack.addNode(/State=/3, /ForestNode=/EnumNameNode, /Parents=/{});
const auto *GSSNode4 = GSStack.addNode(		const auto *GSSNode4 = GSStack.addNode(
/State=/4, &Arena.createTerminal(tok::star, /TokenIndex=/1),		/State=/4, &Arena.createTerminal(tok::star, /TokenIndex=/1),
/Parents=/{GSSNode2, GSSNode3});		/Parents=/{GSSNode2, GSSNode3});

LRTable Table = LRTable::buildForTests(		LRTable::Builder LR;
G->table(),		LR.StateCount = 7;
{{/State=/2, id("ptr-operator"), Action::goTo(/NextState=/5)},		LR.GoTo[{2, id("ptr-operator")}] = 5;
{/State=/3, id("ptr-operator"), Action::goTo(/NextState=/6)}});		LR.GoTo[{3, id("ptr-operator")}] = 6;
std::vector<ParseStep> PendingReduce = {		LR.Reduce.push_back({4, ruleFor("ptr-operator")});
{GSSNode4, Action::reduce(ruleFor("ptr-operator"))}};		LRTable Table = std::move(LR).build();
glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
captureNewHeads());

EXPECT_THAT(NewHeadResults,		std::vector<const GSS::Node *> Heads = {GSSNode4};
testing::UnorderedElementsAre(		glrReduce(Heads, {*G, Table, Arena, GSStack});

		EXPECT_THAT(Heads, UnorderedElementsAre(
		GSSNode4,
AllOf(state(5), parsedSymbolID(id("ptr-operator")),		AllOf(state(5), parsedSymbolID(id("ptr-operator")),
parents({GSSNode2})),		parents({GSSNode2})),
AllOf(state(6), parsedSymbolID(id("ptr-operator")),		AllOf(state(6), parsedSymbolID(id("ptr-operator")),
parents({GSSNode3})))) << NewHeadResults;		parents({GSSNode3}))))
		<< Heads;
// Verify that the payload of the two new heads is shared, only a single		// Verify that the payload of the two new heads is shared, only a single
// ptr-operator node is created in the forest.		// ptr-operator node is created in the forest.
EXPECT_EQ(NewHeadResults[0]->Payload, NewHeadResults[1]->Payload);		EXPECT_EQ(Heads[1]->Payload, Heads[2]->Payload);
}		}

TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {		TEST_F(GLRTest, ReduceJoiningWithMultipleBases) {
// Before (joining due to same goto state, multiple bases):		// Before (joining due to same goto state, multiple bases):
// 0--1(cv-qualifier)--3(class-name)		// 0--1(cv-qualifier)--3(class-name)
// └--2(cv-qualifier)--4(enum-name)		// └--2(cv-qualifier)--4(enum-name)
// After reducing 3 by `type-name := class-name` and		// After reducing 3 by `type-name := class-name` and
// 4 by `type-name := enum-name`:		// 4 by `type-name := enum-name`:
Show All 15 Lines	const auto *GSSNode2 = GSStack.addNode(
/State=/2, /ForestNode=/CVQualifierNode, /Parents=/{GSSNode0});		/State=/2, /ForestNode=/CVQualifierNode, /Parents=/{GSSNode0});
const auto *GSSNode3 =		const auto *GSSNode3 =
GSStack.addNode(/State=/3, /ForestNode=/ClassNameNode,		GSStack.addNode(/State=/3, /ForestNode=/ClassNameNode,
/Parents=/{GSSNode1});		/Parents=/{GSSNode1});
const auto *GSSNode4 =		const auto *GSSNode4 =
GSStack.addNode(/State=/4, /ForestNode=/EnumNameNode,		GSStack.addNode(/State=/4, /ForestNode=/EnumNameNode,
/Parents=/{GSSNode2});		/Parents=/{GSSNode2});

LRTable Table = LRTable::buildForTests(		LRTable::Builder LR;
G->table(),		LR.StateCount = 6;
{{/State=/1, id("type-name"), Action::goTo(/NextState=/5)},		LR.GoTo[{1, id("type-name")}] = 5;
{/State=/2, id("type-name"), Action::goTo(/NextState=/5)}});		LR.GoTo[{2, id("type-name")}] = 5;
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!		// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
std::vector<ParseStep> PendingReduce = {		LR.Reduce.push_back({3, /* type-name := class-name */0});
{		LR.Reduce.push_back({4, /* type-name := enum-name */1});
GSSNode3, Action::reduce(/RuleID=/0) // type-name := class-name		LRTable Table = std::move(LR).build();
},
{		std::vector<const GSS::Node *> Heads = {GSSNode3, GSSNode4};
GSSNode4, Action::reduce(/RuleID=/1) // type-name := enum-name		glrReduce(Heads, {*G, Table, Arena, GSStack});
}};
glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
captureNewHeads());

// Verify that the stack heads are joint at state 5 after reduces.		// Verify that the stack heads are joint at state 5 after reduces.
EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(AllOf(		EXPECT_THAT(Heads, ElementsAre(GSSNode3, GSSNode4,
state(5), parsedSymbolID(id("type-name")),		AllOf(state(5),
		parsedSymbolID(id("type-name")),
parents({GSSNode1, GSSNode2}))))		parents({GSSNode1, GSSNode2}))))
<< NewHeadResults;		<< Heads;
// Verify that we create an ambiguous ForestNode of two parses of `type-name`.		// Verify that we create an ambiguous ForestNode of two parses of `type-name`.
EXPECT_EQ(NewHeadResults.front()->Payload->dumpRecursive(*G),		EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G),
"[ 1, end) type-name := <ambiguous>\n"		"[ 1, end) type-name := <ambiguous>\n"
"[ 1, end) ├─type-name := class-name\n"		"[ 1, end) ├─type-name := class-name\n"
"[ 1, end) │ └─class-name := <opaque>\n"		"[ 1, end) │ └─class-name := <opaque>\n"
"[ 1, end) └─type-name := enum-name\n"		"[ 1, end) └─type-name := enum-name\n"
"[ 1, end) └─enum-name := <opaque>\n");		"[ 1, end) └─enum-name := <opaque>\n");
}		}

TEST_F(GLRTest, ReduceJoiningWithSameBase) {		TEST_F(GLRTest, ReduceJoiningWithSameBase) {
Show All 19 Lines	const auto *GSSNode2 =
GSStack.addNode(/State=/2, /ForestNode=/EnumNameNode,		GSStack.addNode(/State=/2, /ForestNode=/EnumNameNode,
/Parents=/{GSSNode0});		/Parents=/{GSSNode0});
const auto *GSSNode3 =		const auto *GSSNode3 =
GSStack.addNode(/State=/3, /ForestNode=/StartTerminal,		GSStack.addNode(/State=/3, /ForestNode=/StartTerminal,
/Parents=/{GSSNode1});		/Parents=/{GSSNode1});
const auto *GSSNode4 =		const auto *GSSNode4 =
GSStack.addNode(/State=/4, /ForestNode=/StartTerminal,		GSStack.addNode(/State=/4, /ForestNode=/StartTerminal,
/Parents=/{GSSNode2});		/Parents=/{GSSNode2});
		LRTable::Builder LR;
LRTable Table = LRTable::buildForTests(		LR.StateCount = 6;
G->table(), {{/State=/0, id("pointer"), Action::goTo(5)}});		LR.GoTo[{0, id("pointer")}] = 5;
// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!		// FIXME: figure out a way to get rid of the hard-coded reduce RuleID!
std::vector<ParseStep> PendingReduce = {		LR.Reduce.push_back({3, /* pointer := class-name */0});
{		LR.Reduce.push_back({4, /* pointer := enum-name */1});
GSSNode3, Action::reduce(/RuleID=/0) // pointer := class-name *		LRTable Table = std::move(LR).build();
},
{		std::vector<const GSS::Node *> Heads = { GSSNode3, GSSNode4 };
GSSNode4, Action::reduce(/RuleID=/1) // pointer := enum-name *		glrReduce(Heads, {*G, Table, Arena, GSStack});
}};
glrReduce(PendingReduce, {*G, Table, Arena, GSStack},
captureNewHeads());

EXPECT_THAT(NewHeadResults, testing::UnorderedElementsAre(		EXPECT_THAT(Heads, ElementsAre(GSSNode3, GSSNode4,
AllOf(state(5), parsedSymbolID(id("pointer")),		AllOf(state(5), parsedSymbolID(id("pointer")),
parents({GSSNode0}))))		parents({GSSNode0}))))
<< NewHeadResults;		<< Heads;
EXPECT_EQ(NewHeadResults.front()->Payload->dumpRecursive(*G),		EXPECT_EQ(Heads.back()->Payload->dumpRecursive(*G),
"[ 0, end) pointer := <ambiguous>\n"		"[ 0, end) pointer := <ambiguous>\n"
"[ 0, end) ├─pointer := class-name *\n"		"[ 0, end) ├─pointer := class-name *\n"
"[ 0, 1) │ ├─class-name := <opaque>\n"		"[ 0, 1) │ ├─class-name := <opaque>\n"
"[ 1, end) │ └─* := tok[1]\n"		"[ 1, end) │ └─* := tok[1]\n"
"[ 0, end) └─pointer := enum-name *\n"		"[ 0, end) └─pointer := enum-name *\n"
"[ 0, 1) ├─enum-name := <opaque>\n"		"[ 0, 1) ├─enum-name := <opaque>\n"
"[ 1, end) └─* := tok[1]\n");		"[ 1, end) └─* := tok[1]\n");
}		}
Show All 12 Lines	build(R"bnf(
test := { expr		test := { expr
test := { IDENTIFIER		test := { IDENTIFIER
test := left-paren expr		test := left-paren expr
left-paren := {		left-paren := {
expr := IDENTIFIER		expr := IDENTIFIER
)bnf");		)bnf");
clang::LangOptions LOptions;		clang::LangOptions LOptions;
const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);		const TokenStream &Tokens = cook(lex("{ abc", LOptions), LOptions);
auto LRTable = LRTable::buildSLR(*G);		auto LRTable = LRTable::buildLR0(*G);

const ForestNode &Parsed =		const ForestNode &Parsed =
glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));		glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
// Verify that there is no duplicated sequence node of `expr := IDENTIFIER`		// Verify that there is no duplicated sequence node of `expr := IDENTIFIER`
// in the forest, see the `#1` and `=#1` in the dump string.		// in the forest, see the `#1` and `=#1` in the dump string.
EXPECT_EQ(Parsed.dumpRecursive(*G),		EXPECT_EQ(Parsed.dumpRecursive(*G),
"[ 0, end) test := <ambiguous>\n"		"[ 0, end) test := <ambiguous>\n"
"[ 0, end) ├─test := { expr\n"		"[ 0, end) ├─test := { expr\n"
Show All 21 Lines	build(R"bnf(
_ := test		_ := test

test := IDENTIFIER		test := IDENTIFIER
test := foo		test := foo
foo := IDENTIFIER		foo := IDENTIFIER
)bnf");		)bnf");
clang::LangOptions LOptions;		clang::LangOptions LOptions;
const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);		const TokenStream &Tokens = cook(lex("IDENTIFIER", LOptions), LOptions);
auto LRTable = LRTable::buildSLR(*G);		auto LRTable = LRTable::buildLR0(*G);

const ForestNode &Parsed =		const ForestNode &Parsed =
glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));		glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
EXPECT_EQ(Parsed.dumpRecursive(*G),		EXPECT_EQ(Parsed.dumpRecursive(*G),
"[ 0, end) test := <ambiguous>\n"		"[ 0, end) test := <ambiguous>\n"
"[ 0, end) ├─test := IDENTIFIER\n"		"[ 0, end) ├─test := IDENTIFIER\n"
"[ 0, end) │ └─IDENTIFIER := tok[0]\n"		"[ 0, end) │ └─IDENTIFIER := tok[0]\n"
"[ 0, end) └─test := foo\n"		"[ 0, end) └─test := foo\n"
"[ 0, end) └─foo := IDENTIFIER\n"		"[ 0, end) └─foo := IDENTIFIER\n"
"[ 0, end) └─IDENTIFIER := tok[0]\n");		"[ 0, end) └─IDENTIFIER := tok[0]\n");
}		}

TEST_F(GLRTest, NoExplicitAccept) {		TEST_F(GLRTest, NoExplicitAccept) {
build(R"bnf(		build(R"bnf(
_ := test		_ := test

test := IDENTIFIER test		test := IDENTIFIER test
test := IDENTIFIER		test := IDENTIFIER
)bnf");		)bnf");
clang::LangOptions LOptions;		clang::LangOptions LOptions;
// Given the following input, and the grammar above, we perform two reductions		// Given the following input, and the grammar above, we perform two reductions
// of the nonterminal `test` when the next token is `eof`, verify that the		// of the nonterminal `test` when the next token is `eof`, verify that the
// parser stops at the right state.		// parser stops at the right state.
const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);		const TokenStream &Tokens = cook(lex("id id", LOptions), LOptions);
auto LRTable = LRTable::buildSLR(*G);		auto LRTable = LRTable::buildLR0(*G);

const ForestNode &Parsed =		const ForestNode &Parsed =
glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));		glrParse(Tokens, {*G, LRTable, Arena, GSStack}, id("test"));
EXPECT_EQ(Parsed.dumpRecursive(*G),		EXPECT_EQ(Parsed.dumpRecursive(*G),
"[ 0, end) test := IDENTIFIER test\n"		"[ 0, end) test := IDENTIFIER test\n"
"[ 0, 1) ├─IDENTIFIER := tok[0]\n"		"[ 0, 1) ├─IDENTIFIER := tok[0]\n"
"[ 1, end) └─test := IDENTIFIER\n"		"[ 1, end) └─test := IDENTIFIER\n"
"[ 1, end) └─IDENTIFIER := tok[1]\n");		"[ 1, end) └─IDENTIFIER := tok[1]\n");
Show All 27 Lines

clang-tools-extra/pseudo/unittests/GrammarTest.cpp

Show First 20 Lines • Show All 136 Lines • ▼ Show 20 Lines	EXPECT_THAT(Diags, UnorderedElementsAre(
"No rules for nonterminal: undefined-sym",		"No rules for nonterminal: undefined-sym",
"Failed to parse 'invalid': no separator :=",		"Failed to parse 'invalid': no separator :=",
"Token-like name IDENFIFIE is used as a nonterminal",		"Token-like name IDENFIFIE is used as a nonterminal",
"No rules for nonterminal: IDENFIFIE",		"No rules for nonterminal: IDENFIFIE",
"The grammar contains a cycle involving symbol a",		"The grammar contains a cycle involving symbol a",
"Unknown attribute 'unknown'"));		"Unknown attribute 'unknown'"));
}		}

TEST_F(GrammarTest, FirstAndFollowSets) {
build(
R"bnf(
_ := expr
expr := expr - term
expr := term
term := IDENTIFIER
term := ( expr )
)bnf");
ASSERT_TRUE(Diags.empty());
auto ToPairs = [](std::vector<llvm::DenseSet<SymbolID>> Input) {
std::vector<std::pair<SymbolID, llvm::DenseSet<SymbolID>>> Sets;
for (SymbolID ID = 0; ID < Input.size(); ++ID)
Sets.emplace_back(ID, std::move(Input[ID]));
return Sets;
};

EXPECT_THAT(
ToPairs(firstSets(*G)),
UnorderedElementsAre(
Pair(id("_"), UnorderedElementsAre(id("IDENTIFIER"), id("("))),
Pair(id("expr"), UnorderedElementsAre(id("IDENTIFIER"), id("("))),
Pair(id("term"), UnorderedElementsAre(id("IDENTIFIER"), id("(")))));
EXPECT_THAT(
ToPairs(followSets(*G)),
UnorderedElementsAre(
Pair(id("_"), UnorderedElementsAre(id("EOF"))),
Pair(id("expr"), UnorderedElementsAre(id("-"), id("EOF"), id(")"))),
Pair(id("term"), UnorderedElementsAre(id("-"), id("EOF"), id(")")))));

build(R"bnf(
# A simplfied C++ decl-specifier-seq.
_ := decl-specifier-seq
decl-specifier-seq := decl-specifier decl-specifier-seq
decl-specifier-seq := decl-specifier
decl-specifier := simple-type-specifier
decl-specifier := INLINE
simple-type-specifier := INT
)bnf");
ASSERT_TRUE(Diags.empty());
EXPECT_THAT(
ToPairs(firstSets(*G)),
UnorderedElementsAre(
Pair(id("_"), UnorderedElementsAre(id("INLINE"), id("INT"))),
Pair(id("decl-specifier-seq"),
UnorderedElementsAre(id("INLINE"), id("INT"))),
Pair(id("simple-type-specifier"), UnorderedElementsAre(id("INT"))),
Pair(id("decl-specifier"),
UnorderedElementsAre(id("INLINE"), id("INT")))));
EXPECT_THAT(
ToPairs(followSets(*G)),
UnorderedElementsAre(
Pair(id("_"), UnorderedElementsAre(id("EOF"))),
Pair(id("decl-specifier-seq"), UnorderedElementsAre(id("EOF"))),
Pair(id("decl-specifier"),
UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF"))),
Pair(id("simple-type-specifier"),
UnorderedElementsAre(id("INLINE"), id("INT"), id("EOF")))));
}

} // namespace		} // namespace
} // namespace pseudo		} // namespace pseudo
} // namespace clang		} // namespace clang

clang-tools-extra/pseudo/unittests/LRTableTest.cpp

	Show All 11 Lines
	#include "gmock/gmock.h"			#include "gmock/gmock.h"
	#include "gtest/gtest.h"			#include "gtest/gtest.h"
	#include <vector>			#include <vector>

	namespace clang {			namespace clang {
	namespace pseudo {			namespace pseudo {
	namespace {			namespace {

				using testing::ElementsAre;
	using testing::IsEmpty;			using testing::IsEmpty;
	using testing::UnorderedElementsAre;			using testing::UnorderedElementsAre;
	using Action = LRTable::Action;

	TEST(LRTable, Builder) {			TEST(LRTable, Builder) {
	GrammarTable GTable;			GrammarTable GT;

	// eof semi ...			// eof semi ...
	// +-------+----+-------+---			// +-------+----+-------+---
	// \|state0 \| \| s0,r0 \|...			// \|state0 \| \| s0,r0 \|...
	// \|state1 \| acc\| \|...			// \|state1 \| acc\| \|...
	// \|state2 \| \| r1 \|...			// \|state2 \| \| r1 \|...
	// +-------+----+-------+---			// +-------+----+-------+---
	std::vector<LRTable::Entry> Entries = {			LRTable::Builder Builder;
	{/* State */ 0, tokenSymbol(tok::semi), Action::shift(0)},			Builder.StateCount = 3;
	{/* State */ 0, tokenSymbol(tok::semi), Action::reduce(0)},			Builder.Shift[{/State=/0, tokenSymbol(tok::semi)}] = 0;
	{/* State */ 1, tokenSymbol(tok::eof), Action::reduce(2)},			Builder.Reduce.push_back({/State=/0, /Rule=/0});
	{/* State */ 2, tokenSymbol(tok::semi), Action::reduce(1)}};			Builder.Reduce.push_back({/State=/1, /Rule=/2});
	GrammarTable GT;			Builder.Reduce.push_back({/State=/2, /Rule=/1});
	LRTable T = LRTable::buildForTests(GT, Entries);			LRTable T = std::move(Builder).build();
	EXPECT_THAT(T.find(0, tokenSymbol(tok::eof)), IsEmpty());			EXPECT_EQ(T.getShiftState(0, tokenSymbol(tok::eof)), llvm::None);
	EXPECT_THAT(T.find(0, tokenSymbol(tok::semi)),			EXPECT_EQ(T.getShiftState(0, tokenSymbol(tok::semi)), LRTable::StateID{0});
	UnorderedElementsAre(Action::shift(0), Action::reduce(0)));			EXPECT_THAT(T.getReduceRules(0), ElementsAre(0));
	EXPECT_THAT(T.find(1, tokenSymbol(tok::eof)),			EXPECT_EQ(T.getShiftState(1, tokenSymbol(tok::semi)), llvm::None);
	UnorderedElementsAre(Action::reduce(2)));			EXPECT_THAT(T.getReduceRules(1), ElementsAre(2));
	EXPECT_THAT(T.find(1, tokenSymbol(tok::semi)), IsEmpty());			EXPECT_THAT(T.getReduceRules(2), ElementsAre(1));
	EXPECT_THAT(T.find(2, tokenSymbol(tok::semi)),
	UnorderedElementsAre(Action::reduce(1)));
	// Verify the behaivor for other non-available-actions terminals.			// Verify the behaivor for other non-available-actions terminals.
	EXPECT_THAT(T.find(2, tokenSymbol(tok::kw_int)), IsEmpty());			EXPECT_EQ(T.getShiftState(2, tokenSymbol(tok::kw_int)), llvm::None);
	}			}

	} // namespace			} // namespace
	} // namespace pseudo			} // namespace pseudo
	} // namespace clang			} // namespace clang

This is an archive of the discontinued LLVM Phabricator instance.

[pseudo] wip/prototype: use LR0 instead of SLR1 table
Needs ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 438735

clang-tools-extra/pseudo/benchmarks/Benchmark.cpp

clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp

clang-tools-extra/pseudo/include/clang-pseudo/GLR.h

clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h

clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h

clang-tools-extra/pseudo/lib/GLR.cpp

clang-tools-extra/pseudo/lib/cxx/CXX.cpp

clang-tools-extra/pseudo/lib/grammar/Grammar.cpp

clang-tools-extra/pseudo/lib/grammar/LRTable.cpp

clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp

clang-tools-extra/pseudo/test/lr-build-basic.test

clang-tools-extra/pseudo/test/lr-build-conflicts.test

clang-tools-extra/pseudo/tool/ClangPseudo.cpp

clang-tools-extra/pseudo/unittests/GLRTest.cpp

clang-tools-extra/pseudo/unittests/GrammarTest.cpp

clang-tools-extra/pseudo/unittests/LRTableTest.cpp

This is an archive of the discontinued LLVM Phabricator instance.

[pseudo] wip/prototype: use LR0 instead of SLR1 tableNeeds ReviewPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 438735

clang-tools-extra/pseudo/benchmarks/Benchmark.cpp

clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp

clang-tools-extra/pseudo/include/clang-pseudo/GLR.h

clang-tools-extra/pseudo/include/clang-pseudo/grammar/Grammar.h

clang-tools-extra/pseudo/include/clang-pseudo/grammar/LRTable.h

clang-tools-extra/pseudo/lib/GLR.cpp

clang-tools-extra/pseudo/lib/cxx/CXX.cpp

clang-tools-extra/pseudo/lib/grammar/Grammar.cpp

clang-tools-extra/pseudo/lib/grammar/LRTable.cpp

clang-tools-extra/pseudo/lib/grammar/LRTableBuild.cpp

clang-tools-extra/pseudo/test/lr-build-basic.test

clang-tools-extra/pseudo/test/lr-build-conflicts.test

clang-tools-extra/pseudo/tool/ClangPseudo.cpp

clang-tools-extra/pseudo/unittests/GLRTest.cpp

clang-tools-extra/pseudo/unittests/GrammarTest.cpp

clang-tools-extra/pseudo/unittests/LRTableTest.cpp

[pseudo] wip/prototype: use LR0 instead of SLR1 table
Needs ReviewPublic