diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h
--- a/clang/include/clang/Lex/Preprocessor.h
+++ b/clang/include/clang/Lex/Preprocessor.h
@@ -33,6 +33,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/FunctionExtras.h"
 #include "llvm/ADT/None.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerUnion.h"
@@ -48,8 +49,8 @@
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
-#include <memory>
 #include <map>
+#include <memory>
 #include <string>
 #include <utility>
 #include <vector>
@@ -124,6 +125,7 @@
   friend class VAOptDefinitionContext;
   friend class VariadicMacroScopeGuard;
 
+  llvm::unique_function<void(const clang::Token &)> OnToken;
   std::shared_ptr<PreprocessorOptions> PPOpts;
   DiagnosticsEngine        *Diags;
   LangOptions       &LangOpts;
@@ -997,6 +999,12 @@
   }
   /// \}
 
+  /// Register a function that would be called on each token in the final
+  /// expanded token stream.
+  void setTokenWatcher(llvm::unique_function<void(const clang::Token &)> F) {
+    OnToken = std::move(F);
+  }
+
   bool isMacroDefined(StringRef Id) {
     return isMacroDefined(&Identifiers.get(Id));
   }
@@ -2135,7 +2143,7 @@
 
   //===--------------------------------------------------------------------===//
   // Caching stuff.
-  void CachingLex(Token &Result, bool &IsNewToken);
+  void CachingLex(Token &Result);
 
   bool InCachingLexMode() const {
     // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means
diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h
--- a/clang/include/clang/Lex/Token.h
+++ b/clang/include/clang/Lex/Token.h
@@ -70,20 +70,22 @@
 public:
   // Various flags set per token:
   enum TokenFlags {
-    StartOfLine   = 0x01,  // At start of line or only after whitespace
-                           // (considering the line after macro expansion).
-    LeadingSpace  = 0x02,  // Whitespace exists before this token (considering
-                           // whitespace after macro expansion).
-    DisableExpand = 0x04,  // This identifier may never be macro expanded.
-    NeedsCleaning = 0x08,  // Contained an escaped newline or trigraph.
+    StartOfLine = 0x01,   // At start of line or only after whitespace
+                          // (considering the line after macro expansion).
+    LeadingSpace = 0x02,  // Whitespace exists before this token (considering
+                          // whitespace after macro expansion).
+    DisableExpand = 0x04, // This identifier may never be macro expanded.
+    NeedsCleaning = 0x08, // Contained an escaped newline or trigraph.
     LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
-    HasUDSuffix = 0x20,    // This string or character literal has a ud-suffix.
-    HasUCN = 0x40,         // This identifier contains a UCN.
-    IgnoredComma = 0x80,   // This comma is not a macro argument separator (MS).
+    HasUDSuffix = 0x20,  // This string or character literal has a ud-suffix.
+    HasUCN = 0x40,       // This identifier contains a UCN.
+    IgnoredComma = 0x80, // This comma is not a macro argument separator (MS).
     StringifiedInMacro = 0x100, // This string or character literal is formed by
                                 // macro stringizing or charizing operator.
     CommaAfterElided = 0x200, // The comma following this token was elided (MS).
     IsEditorPlaceholder = 0x400, // This identifier is a placeholder.
+    IsNewToken = 0x800, // This tokens has not been seen before, i.e. not from a
+                        // cached token stream.
   };
 
   tok::TokenKind getKind() const { return Kind; }
diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h
--- a/clang/include/clang/Lex/TokenLexer.h
+++ b/clang/include/clang/Lex/TokenLexer.h
@@ -147,6 +147,10 @@
   /// preprocessor directive.
   bool isParsingPreprocessorDirective() const;
 
+  /// Returns true iff the TokenLexer is expanding a macro and not replaying a
+  /// stream of tokens.
+  bool isMacroExpansion() const { return Macro != nullptr; }
+
 private:
   void destroy();
 
diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp
--- a/clang/lib/Lex/PPCaching.cpp
+++ b/clang/lib/Lex/PPCaching.cpp
@@ -45,7 +45,7 @@
   recomputeCurLexerKind();
 }
 
-void Preprocessor::CachingLex(Token &Result, bool &IsNewToken) {
+void Preprocessor::CachingLex(Token &Result) {
   if (!InCachingLexMode())
     return;
 
@@ -55,7 +55,8 @@
 
   if (CachedLexPos < CachedTokens.size()) {
     Result = CachedTokens[CachedLexPos++];
-    IsNewToken = false;
+    // FIXME: do this flag when writing to CachedTokens.
+    Result.clearFlag(Token::IsNewToken);
     return;
   }
 
diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp
--- a/clang/lib/Lex/Preprocessor.cpp
+++ b/clang/lib/Lex/Preprocessor.cpp
@@ -635,8 +635,7 @@
       CurTokenLexer->Lex(Tok);
       break;
     case CLK_CachingLexer:
-      bool IsNewToken;
-      CachingLex(Tok, IsNewToken);
+      CachingLex(Tok);
       break;
     case CLK_LexAfterModuleImport:
       LexAfterModuleImport(Tok);
@@ -880,22 +879,27 @@
   ++LexLevel;
 
   // We loop here until a lex function returns a token; this avoids recursion.
+  // FIXME: IsNewToken should be set inside lexers.
   bool ReturnedToken;
-  bool IsNewToken = true;
   do {
     switch (CurLexerKind) {
     case CLK_Lexer:
       ReturnedToken = CurLexer->Lex(Result);
+      Result.setFlag(Token::IsNewToken);
       break;
-    case CLK_TokenLexer:
+    case CLK_TokenLexer: {
+      bool IsNewToken = CurTokenLexer->isMacroExpansion();
       ReturnedToken = CurTokenLexer->Lex(Result);
+      Result.setFlagValue(Token::IsNewToken, IsNewToken);
       break;
+    }
     case CLK_CachingLexer:
-      CachingLex(Result, IsNewToken);
+      CachingLex(Result);
       ReturnedToken = true;
       break;
     case CLK_LexAfterModuleImport:
       ReturnedToken = LexAfterModuleImport(Result);
+      Result.setFlag(Token::IsNewToken);
       break;
     }
   } while (!ReturnedToken);
@@ -911,7 +915,8 @@
 
   // Update ImportSeqState to track our position within a C++20 import-seq
   // if this token is being produced as a result of phase 4 of translation.
-  if (getLangOpts().CPlusPlusModules && LexLevel == 1 && IsNewToken) {
+  if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
+      Result.getFlag(Token::IsNewToken)) {
     switch (Result.getKind()) {
     case tok::l_paren: case tok::l_square: case tok::l_brace:
       ImportSeqState.handleOpenBracket();
@@ -952,6 +957,8 @@
 
   LastTokenWasAt = Result.is(tok::at);
   --LexLevel;
+  if (OnToken && LexLevel == 0 && Result.getFlag(Token::IsNewToken))
+    OnToken(Result);
 }
 
 /// Lex a header-name token (including one formed from header-name-tokens if