diff --git a/clang/include/clang/Tooling/Syntax/Pseudo/Forest.h b/clang/include/clang/Tooling/Syntax/Pseudo/Forest.h
--- a/clang/include/clang/Tooling/Syntax/Pseudo/Forest.h
+++ b/clang/include/clang/Tooling/Syntax/Pseudo/Forest.h
@@ -81,6 +81,11 @@
     assert(elements.size() < (1 << (16 - RuleBits)));
     return rule | elements.size() << RuleBits;
   }
+  static uint16_t
+  AmbiguousData(llvm::ArrayRef<const ForestNode *> alternatives) {
+    return alternatives.size();
+  }
+
   Token::Index StartLoc;
   Kind K : 4;
   SymbolID Symbol : SymbolBits;
@@ -106,6 +111,14 @@
                   ForestNode::SequenceData(RID, Elements), Elements);
   }
 
+  ForestNode &createAmbiguous(SymbolID symbol,
+                              llvm::ArrayRef<const ForestNode *> alternatives) {
+    assert(!alternatives.empty());
+    return create(ForestNode::Ambiguous, symbol,
+                  alternatives.front()->startLoc(),
+                  ForestNode::AmbiguousData(alternatives), alternatives);
+  }
+
   void init(const TokenStream &Code);
   const ForestNode &terminal(Token::Index Index) const {
     assert(Terminals && "Terminals are not intialized!");
diff --git a/clang/include/clang/Tooling/Syntax/Pseudo/GLRParser.h b/clang/include/clang/Tooling/Syntax/Pseudo/GLRParser.h
--- a/clang/include/clang/Tooling/Syntax/Pseudo/GLRParser.h
+++ b/clang/include/clang/Tooling/Syntax/Pseudo/GLRParser.h
@@ -81,9 +81,9 @@
   };
 
   // Creates a new node in the graph.
-  const Node *addNode(LRTable::StateID State,
-                      const ::clang::syntax::pseudo::ForestNode *Symbol,
-                      llvm::ArrayRef<const Node *> Predecessors) {
+  Node *addNode(LRTable::StateID State,
+                const ::clang::syntax::pseudo::ForestNode *Symbol,
+                llvm::ArrayRef<const Node *> Predecessors) {
     assert(Predecessors.size() < (1 << Node::PredecessorBits) &&
            "Too many predecessors to fit in PredecessorBits!");
     ++NodeCount;
diff --git a/clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h b/clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h
--- a/clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h
+++ b/clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h
@@ -158,6 +158,10 @@
 
   struct Nonterminal {
     std::string Name;
+    // Value of topological order of the nonterminal.
+    // For nonterminals A and B, if A := B (or transitively), then
+    // A.TopologicalOrder > B.TopologicalOrder.
+    unsigned TopologicalOrder = 0;
     // Corresponding rules that construct the non-terminal, it is a [start, end)
     // index range of the Rules table.
     struct {
diff --git a/clang/lib/Tooling/Syntax/Pseudo/GLRParser.cpp b/clang/lib/Tooling/Syntax/Pseudo/GLRParser.cpp
--- a/clang/lib/Tooling/Syntax/Pseudo/GLRParser.cpp
+++ b/clang/lib/Tooling/Syntax/Pseudo/GLRParser.cpp
@@ -18,6 +18,7 @@
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/FormatVariadic.h"
 #include <memory>
+#include <queue>
 #include <tuple>
 
 #define DEBUG_TYPE "GLRParser.cpp"
@@ -156,72 +157,135 @@
   enumPath(Head, PathLength);
 }
 
-// Perform reduction recursively until we don't have reduce actions with
-// heads.
+// Perform reductions recursively until there is no available reductions.
+//
+// Reductions can manipulate the GSS in following way:
+//
+//  1) Split --
+//     1.1 when a stack head has mutiple reduce actions, the head is
+//     made to split to accommodate the various possiblities.
+//     E.g.
+//       0 -> 1 (ID)
+//     After performing reduce of production rules (class-name := ID,
+//     enum-name := ID), the GSS now has two new heads:
+//       0 -> 2 (class-name)
+//        `-> 3 (enum-name)
+//
+//     1.2 when a stack head has a reduce action with multiple bases, the head
+//     will be split if each base leads to different states.
+//     E.g.
+//       ... -> 1(...) -> 3 (INT)
+//                        ^
+//       ... -> 2(...) ---|
+//
+//     After the reduce action (simple-type-specifier := INT), the GSS looks
+//     like:
+//       ... -> 1(...) -> 4 (simple-type-specifier)
+//       ... -> 2(...) -> 5 (simple-type-specifier)
+//
+//  2) Merge -- if multiple heads turn out to be identical after
+//     reduction (new heads have the same state, and point to the same
+//     predecessors), these heads are merged and treated as a single head.
+//     This is usually where ambiguity happens.
+//
+//     E.g.
+//         0 -> 2 (class-name)
+//         ` -> 3 (enum-name)
+//     After reduction of rules (type-name := class-name | enum-name), the GSS
+//     has the following form:
+//         0 -> 4 (type-name)
+//     The type-name forest node in the new head 4 is ambiguous, which has two
+//     parses (type-name -> class-name -> id, type-name -> enum-name -> id).
 void GLRParser::performReduction(const Token &Lookahead) {
   if (!ReduceList.empty())
     LLVM_DEBUG(llvm::dbgs() << "  Performing **Reduce**\n");
 
-  // Reduce can manipulate the GSS in following way:
-  //
-  //  1) Split --
-  //     1.1 when a stack head has mutiple reduce actions, the head is
-  //     made to split to accommodate the various possiblities.
-  //     E.g.
-  //       0 -> 1 (ID)
-  //     After performing reduce of production rules (class-name := ID,
-  //     enum-name := ID), the GSS now has two new heads:
-  //       0 -> 2 (class-name)
-  //        `-> 3 (enum-name)
+  // Reductions per reduce path.
+  struct ReduceAction {
+    std::vector<const Graph::Node *> ReducePath;
+    RuleID ReduceRuleID;
+  };
+  auto OrderCmp = [this](const ReduceAction &L, const ReduceAction &R) {
+    auto LBegin = L.ReducePath.back()->Parsed->startLoc();
+    auto RBegin = R.ReducePath.back()->Parsed->startLoc();
+    if (LBegin == RBegin)
+      return G.table()
+                 .Nonterminals[G.lookupRule(L.ReduceRuleID).Target]
+                 .TopologicalOrder >
+             G.table()
+                 .Nonterminals[G.lookupRule(R.ReduceRuleID).Target]
+                 .TopologicalOrder;
+    return LBegin < RBegin;
+  };
+  auto Equal = [this](const ReduceAction &L, const ReduceAction &R) {
+    return L.ReducePath.back()->Parsed->startLoc() ==
+               R.ReducePath.back()->Parsed->startLoc() &&
+           G.lookupRule(L.ReduceRuleID).Target ==
+               G.lookupRule(R.ReduceRuleID).Target;
+  };
+
+  // Forest node is unmutable. To create an amgbiguous forest node, we need to
+  // know all alternatives in advance.
   //
-  //     1.2 when a stack head has a reduce action with multiple reduce
-  //     paths, the head is to split.
-  //     E.g.
-  //       ... -> 1(...) -> 3 (INT)
-  //                        ^
-  //       ... -> 2(...) ---|
+  // All Reductions must be performed in a careful order, so that we can gather
+  // all ambiguous alternatives as a batch, and process them as a single pass.
   //
-  //     After the reduce action (simple-type-specifier := INT), the GSS looks
-  //     like:
-  //       ... -> 1(...) -> 4 (simple-type-specifier)
-  //       ... -> 2(...) -> 5 (simple-type-specifier)
+  // Reductions is stored in a priority queue with a sorted order according to:
+  //   Rule 1: Reductions which span fewer tokens are processed first;
+  //   Rule 2: If two reductions A := X and B := Y span the same tokens,
+  //           A := X is processed first if topological order of nonterminal
+  //           A is less than nonterminal B (That is to say: if there is a
+  //           production rule B := A in the grammar, the reduction A := X
+  //           should come first because it will enable a new reduction B := A);
   //
-  //  2) Merge -- if multiple heads turn out to be identical after
-  //     reduction (new heads have the same state, and point to the same
-  //     predecessors), these heads are merged and treated as a single head.
-  //     This is usually where ambiguity happens.
+  // Each iteration, we construct a batch from the priority queue. Reductions in
+  // the batch span the same tokens and reduce to the same nonterminal.
   //
-  //     E.g.
-  //         0 -> 2 (class-name)
-  //         ` -> 3 (enum-name)
-  //     After reduction of rules (type-name := class-name | enum-name), the GSS
-  //     has the following form:
-  //         0 -> 4 (type-name)
-  //     The type-name forest node in the new head 4 is ambiguous, which has two
-  //     parses (type-name -> class-name -> id, type-name -> enum-name -> id).
+  // Local ambiguity packing (if present) is guaranteed in each batch.
+  std::priority_queue<ReduceAction, std::vector<ReduceAction>,
+                      decltype(OrderCmp)>
+      OrderedReduceList(OrderCmp);
+  auto addToOrderedReduceList = [&OrderedReduceList,
+                                 this](decltype(ReduceList) &ReduceList) {
+    std::vector<const Graph::Node *> ReducePath;
+    for (const auto &RA : ReduceList) {
+      RuleID ReduceRuleID = RA.PerformAction->getReduceRule();
+      const Rule &ReduceRule = G.lookupRule(ReduceRuleID);
+      enumerateReducePath(RA.Head, ReduceRule.Size, ReducePath, [&]() {
+        OrderedReduceList.push({ReducePath, ReduceRuleID});
+      });
+    }
+    ReduceList.clear();
+  };
+  addToOrderedReduceList(ReduceList);
 
-  // Store all newly-created stack heads for tracking ambiguities.
-  std::vector<const Graph::Node *> CreatedHeads;
-  while (!ReduceList.empty()) {
-    auto RA = std::move(ReduceList.back());
-    ReduceList.pop_back();
+  while (!OrderedReduceList.empty()) {
+    std::vector<ReduceAction> Batch;
+    do {
+      Batch.push_back(OrderedReduceList.top());
+      OrderedReduceList.pop();
+    } while (!OrderedReduceList.empty() &&
+             Equal(OrderedReduceList.top(), Batch.front()));
 
-    RuleID ReduceRuleID = RA.PerformAction->getReduceRule();
-    const Rule &ReduceRule = G.lookupRule(ReduceRuleID);
-    LLVM_DEBUG(llvm::dbgs() << llvm::formatv(
-                   "    !reduce rule {0}: {1} head: {2}\n", ReduceRuleID,
-                   G.dumpRule(ReduceRuleID), RA.Head->State));
+    // newly-created GSS node -> corresponding forest node.
+    // If there are more thant 1 forest nodes, it means we hit ambiguities.
+    // Used to assemble the ambiguous forest node at the end.
+    llvm::DenseMap<Graph::Node *, std::vector<const ForestNode *>>
+        BuiltForestNodes;
+    // Track whether we hit ambiguities, determined by the equality of
+    // predecessors.
+    std::vector<Graph::Node *> CreatedGSSNodes;
+
+    for (const auto &RA : Batch) {
+      SymbolID ReduceSymbolID = G.lookupRule(RA.ReduceRuleID).Target;
+      // Create a corresponding sequence forest node for the reduce rule.
+      std::vector<const ForestNode *> ForestChildren;
+      for (const Graph::Node *PN : llvm::reverse(RA.ReducePath))
+        ForestChildren.push_back(PN->Parsed);
+      const ForestNode &ForestNode = ParsedForest.createSequence(
+          ReduceSymbolID, RA.ReduceRuleID, ForestChildren.front()->startLoc(),
+          ForestChildren);
 
-    std::vector<const Graph::Node *> ReducePath;
-    enumerateReducePath(RA.Head, ReduceRule.Size, ReducePath, [&]() {
-      LLVM_DEBUG(
-          llvm::dbgs() << llvm::formatv(
-              "     stack path: {0}, bases: {1}\n",
-              llvm::join(getStateString(ReducePath), " -> "),
-              llvm::join(getStateString(ReducePath.back()->predecessors()),
-                         ", ")));
-      assert(ReducePath.size() == ReduceRule.Size &&
-             "Reduce path's length must equal to the reduce rule size");
       // A reduce is a back-and-forth operation in the stack.
       // For example, we reduce a rule "declaration := decl-specifier-seq ;" on
       // the linear stack:
@@ -232,24 +296,26 @@
       //
       //   1. back -- pop |ReduceRuleLength| nodes (ReducePath) in the stack;
       //   2. forth -- push a new node in the stack and mark it as a head;
-      //     0 -> 4(declaration)
-      //          ^ Head
       //
-      // It becomes tricky if a reduce path has multiple bases, we want to merge
-      // them if their next state is the same. Similiar to above performShift,
-      // we partition the bases by their next state, and process each partition
-      // per loop iteration.
+      //   0 -> 4(declaration)
+      //        ^ Head
+      //
+      // Each RA corresponds to a single reduce path, but a reduce path can have
+      // multiple Bases, which could split the stack (depends on whether their
+      // next state is identical).
+      // Similiar to above `performShift`, we partition the Bases by their
+      // next state, and process each partition.
       struct BaseInfo {
         // An intermediate head after the stack has poped |ReducePath| nodes.
         const Graph::Node *Base = nullptr;
-        // The final state after reduce.
-        // It is getGoToState(Base->State, ReduceSymbol).
+        // The final state after reduction.
+        // The value is getGoToState(Base->State, ReduceSymbol).
         StateID NextState;
       };
       std::vector<BaseInfo> Bases;
-      for (const Graph::Node *Base : ReducePath.back()->predecessors())
+      for (const Graph::Node *Base : RA.ReducePath.back()->predecessors())
         Bases.push_back(
-            {Base, ParsingTable.getGoToState(Base->State, ReduceRule.Target)});
+            {Base, ParsingTable.getGoToState(Base->State, ReduceSymbolID)});
       llvm::sort(Bases, [](const BaseInfo &L, const BaseInfo &R) {
         return std::forward_as_tuple(L.NextState, L.Base) <
                std::forward_as_tuple(R.NextState, R.Base);
@@ -269,44 +335,64 @@
         assert(!Batch.empty());
         Partition = Partition.drop_front(Batch.size());
 
+        // Not needed, as it is created outside of the partition-loop.
+        // Create a corresponding sequence forest node for the reduce rule.
+        // std::vector<const ForestNode *> ForestChildren;
+        // for (const Graph::Node *PN : llvm::reverse(RA.ReducePath))
+        //   ForestChildren.push_back(PN->Parsed);
+        // const ForestNode &ForestNode = ParsedForest.createSequence(
+        //     ReduceSymbolID, RA.ReduceRuleID,
+        //     ForestChildren.front()->startLoc(), ForestChildren);
+        LLVM_DEBUG(llvm::dbgs() << llvm::formatv(
+                       "     after reduce: {0} -> state {1} ({2})\n",
+                       llvm::join(getStateString(Predecessors), ", "),
+                       NextState, G.symbolName(ReduceSymbolID)));
+
         // Check ambiguities.
-        auto It = llvm::find_if(CreatedHeads, [&](const Graph::Node *Head) {
-          return Head->Parsed->symbol() == ReduceRule.Target &&
-                 Head->predecessors() == llvm::makeArrayRef(Predecessors);
-        });
-        if (It != CreatedHeads.end()) {
-          // This should be guaranteed by checking the equalivent of
-          // predecessors and reduce nonterminal symbol!
+        // FIXME: this is a linear scan, it might be too slow.
+        auto It =
+            llvm::find_if(CreatedGSSNodes, [&](const Graph::Node *Created) {
+              // Guaranteed by the side-effect of partition.
+              assert(llvm::is_sorted(Created->predecessors()) &&
+                     llvm::is_sorted(llvm::makeArrayRef(Predecessors)));
+              // Guaranteed by the Batch, where all reductions are reduced to
+              // a same nonterminal.
+              assert(Created->Parsed->symbol() == ReduceSymbolID);
+              return Created->predecessors() ==
+                     llvm::makeArrayRef(Predecessors);
+            });
+        if (It != CreatedGSSNodes.end()) {
+          // This is guaranteed by the equality of predecessors and target
+          // nonterminal of reduction rule!
           assert(NextState == (*It)->State);
           LLVM_DEBUG(llvm::dbgs() << llvm::formatv(
                          "    found ambiguity, merged in state {0} (forest "
                          "'{1}')\n",
-                         (*It)->State, G.symbolName((*It)->Parsed->symbol())));
-          // FIXME: create ambiguous foreset node!
+                         NextState, G.symbolName((*It)->Parsed->symbol())));
+          BuiltForestNodes[*It].push_back(&ForestNode);
           continue;
         }
 
-        // Create a corresponding sequence forest node for the reduce rule.
-        std::vector<const ForestNode *> ForestChildren;
-        for (const Graph::Node *PN : llvm::reverse(ReducePath))
-          ForestChildren.push_back(PN->Parsed);
-        const ForestNode &ForestNode = ParsedForest.createSequence(
-            ReduceRule.Target, RA.PerformAction->getReduceRule(),
-            ForestChildren.front()->startLoc(), ForestChildren);
-        LLVM_DEBUG(llvm::dbgs() << llvm::formatv(
-                       "     after reduce: {0} -> state {1} ({2})\n",
-                       llvm::join(getStateString(Predecessors), ", "),
-                       NextState, G.symbolName(ReduceRule.Target)));
-
-        // Create a new stack head.
-        const Graph::Node *Head =
-            GSS.addNode(NextState, &ForestNode, Predecessors);
-        CreatedHeads.push_back(Head);
+        // Create a new GSS node.
+        Graph::Node *Head = GSS.addNode(NextState, &ForestNode, Predecessors);
+        CreatedGSSNodes.push_back(Head);
+        BuiltForestNodes[Head].push_back(&ForestNode);
 
-        // Actions that are enabled by this reduce.
+        // Actions that are enabled by this reduction.
         addActions(Head, Lookahead);
       }
-    });
+    }
+    // We're good to assmeble the ambiguous forest node if any.
+    for (auto It : BuiltForestNodes) {
+      if (It.second.size() > 1) {
+        It.first->Parsed = &ParsedForest.createAmbiguous(
+            It.second.front()->symbol(), It.getSecond());
+        continue;
+      }
+      assert(It.first->Parsed == It.getSecond().front());
+    }
+    // OK, now add newly-enabled reductions to the ordered list;
+    addToOrderedReduceList(ReduceList);
   }
 }
 
diff --git a/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp b/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
--- a/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
+++ b/clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp
@@ -100,11 +100,52 @@
         ++RulePos;
       T->Nonterminals[SID].RuleRange = {Start, RulePos};
     }
+    calculateDependencyOrder(T.get());
     auto G = std::make_unique<Grammar>(std::move(T));
     diagnoseGrammar(*G);
     return G;
   }
 
+  void calculateDependencyOrder(GrammarTable *T) const {
+    llvm::DenseMap<SymbolID, llvm::DenseSet<SymbolID>> DependencyGraph;
+    for (const auto &Rule : T->Rules) {
+      // A := B, A depends on B.
+      if (Rule.Size == 1 && pseudo::isNonterminal(Rule.Sequence[0]))
+        DependencyGraph[Rule.Target].insert(Rule.Sequence[0]);
+    }
+    std::vector<SymbolID> Order;
+    // Each nonterminal state flows: NotVisited -> Visiting -> Visited.
+    enum State {
+      NotVisited,
+      Visiting,
+      Visited,
+    };
+    std::vector<SymbolID> VisitStates(T->Nonterminals.size(), NotVisited);
+    std::function<void(SymbolID)> DFS = [&](SymbolID SID) -> void {
+      if (VisitStates[SID] == Visited)
+        return;
+      if (VisitStates[SID] == Visiting) {
+        Diagnostics.push_back(
+            llvm::formatv("The grammar is cyclic, see symbol {0}\n",
+                          T->Nonterminals[SID].Name));
+        return;
+      }
+      VisitStates[SID] = Visiting;
+      auto It = DependencyGraph.find(SID);
+      if (It != DependencyGraph.end()) {
+        for (SymbolID Dep : (It->getSecond()))
+          DFS(Dep);
+      }
+      VisitStates[SID] = Visited;
+      Order.push_back(SID);
+    };
+    for (SymbolID ID = 0; ID != T->Nonterminals.size(); ++ID)
+      DFS(ID);
+    for (size_t I = 0; I < Order.size(); ++I) {
+      T->Nonterminals[Order[I]].TopologicalOrder = I;
+    }
+  }
+
 private:
   // Text representation of a BNF grammar rule.
   struct RuleSpec {
diff --git a/clang/tools/clang-pseudo/ClangPseudo.cpp b/clang/tools/clang-pseudo/ClangPseudo.cpp
--- a/clang/tools/clang-pseudo/ClangPseudo.cpp
+++ b/clang/tools/clang-pseudo/ClangPseudo.cpp
@@ -89,7 +89,7 @@
                      << " nodes: " << Arena.nodeNum() << "\n";
         llvm::outs() << "GSS bytes: " << Parser.getGSS().bytes()
                      << " nodes: " << Parser.getGSS().nodeCount() << "\n";
-        // llvm::outs() << Root->DumpRecursive(*G, true);
+        llvm::outs() << Root->DumpRecursive(*G, false);
       }
     }
     return 0;