diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -33,6 +33,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerUnion.h" @@ -48,8 +49,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -124,6 +125,7 @@ friend class VAOptDefinitionContext; friend class VariadicMacroScopeGuard; + llvm::unique_function OnToken; std::shared_ptr PPOpts; DiagnosticsEngine *Diags; LangOptions &LangOpts; @@ -997,6 +999,12 @@ } /// \} + /// Register a function that would be called on each token in the final + /// expanded token stream. + void setTokenWatcher(llvm::unique_function F) { + OnToken = std::move(F); + } + bool isMacroDefined(StringRef Id) { return isMacroDefined(&Identifiers.get(Id)); } @@ -2135,7 +2143,7 @@ //===--------------------------------------------------------------------===// // Caching stuff. - void CachingLex(Token &Result, bool &IsNewToken); + void CachingLex(Token &Result); bool InCachingLexMode() const { // If the Lexer pointers are 0 and IncludeMacroStack is empty, it means diff --git a/clang/include/clang/Lex/Token.h b/clang/include/clang/Lex/Token.h --- a/clang/include/clang/Lex/Token.h +++ b/clang/include/clang/Lex/Token.h @@ -70,20 +70,22 @@ public: // Various flags set per token: enum TokenFlags { - StartOfLine = 0x01, // At start of line or only after whitespace - // (considering the line after macro expansion). - LeadingSpace = 0x02, // Whitespace exists before this token (considering - // whitespace after macro expansion). - DisableExpand = 0x04, // This identifier may never be macro expanded. - NeedsCleaning = 0x08, // Contained an escaped newline or trigraph. + StartOfLine = 0x01, // At start of line or only after whitespace + // (considering the line after macro expansion). + LeadingSpace = 0x02, // Whitespace exists before this token (considering + // whitespace after macro expansion). + DisableExpand = 0x04, // This identifier may never be macro expanded. + NeedsCleaning = 0x08, // Contained an escaped newline or trigraph. LeadingEmptyMacro = 0x10, // Empty macro exists before this token. - HasUDSuffix = 0x20, // This string or character literal has a ud-suffix. - HasUCN = 0x40, // This identifier contains a UCN. - IgnoredComma = 0x80, // This comma is not a macro argument separator (MS). + HasUDSuffix = 0x20, // This string or character literal has a ud-suffix. + HasUCN = 0x40, // This identifier contains a UCN. + IgnoredComma = 0x80, // This comma is not a macro argument separator (MS). StringifiedInMacro = 0x100, // This string or character literal is formed by // macro stringizing or charizing operator. CommaAfterElided = 0x200, // The comma following this token was elided (MS). IsEditorPlaceholder = 0x400, // This identifier is a placeholder. + IsNewToken = 0x800, // This tokens has not been seen before, i.e. not from a + // cached token stream. }; tok::TokenKind getKind() const { return Kind; } diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -147,6 +147,10 @@ /// preprocessor directive. bool isParsingPreprocessorDirective() const; + /// Returns true iff the TokenLexer is expanding a macro and not replaying a + /// stream of tokens. + bool isMacroExpansion() const { return Macro != nullptr; } + private: void destroy(); diff --git a/clang/lib/Lex/PPCaching.cpp b/clang/lib/Lex/PPCaching.cpp --- a/clang/lib/Lex/PPCaching.cpp +++ b/clang/lib/Lex/PPCaching.cpp @@ -45,7 +45,7 @@ recomputeCurLexerKind(); } -void Preprocessor::CachingLex(Token &Result, bool &IsNewToken) { +void Preprocessor::CachingLex(Token &Result) { if (!InCachingLexMode()) return; @@ -55,7 +55,8 @@ if (CachedLexPos < CachedTokens.size()) { Result = CachedTokens[CachedLexPos++]; - IsNewToken = false; + // FIXME: do this flag when writing to CachedTokens. + Result.clearFlag(Token::IsNewToken); return; } diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -635,8 +635,7 @@ CurTokenLexer->Lex(Tok); break; case CLK_CachingLexer: - bool IsNewToken; - CachingLex(Tok, IsNewToken); + CachingLex(Tok); break; case CLK_LexAfterModuleImport: LexAfterModuleImport(Tok); @@ -880,22 +879,27 @@ ++LexLevel; // We loop here until a lex function returns a token; this avoids recursion. + // FIXME: IsNewToken should be set inside lexers. bool ReturnedToken; - bool IsNewToken = true; do { switch (CurLexerKind) { case CLK_Lexer: ReturnedToken = CurLexer->Lex(Result); + Result.setFlag(Token::IsNewToken); break; - case CLK_TokenLexer: + case CLK_TokenLexer: { + bool IsNewToken = CurTokenLexer->isMacroExpansion(); ReturnedToken = CurTokenLexer->Lex(Result); + Result.setFlagValue(Token::IsNewToken, IsNewToken); break; + } case CLK_CachingLexer: - CachingLex(Result, IsNewToken); + CachingLex(Result); ReturnedToken = true; break; case CLK_LexAfterModuleImport: ReturnedToken = LexAfterModuleImport(Result); + Result.setFlag(Token::IsNewToken); break; } } while (!ReturnedToken); @@ -911,7 +915,8 @@ // Update ImportSeqState to track our position within a C++20 import-seq // if this token is being produced as a result of phase 4 of translation. - if (getLangOpts().CPlusPlusModules && LexLevel == 1 && IsNewToken) { + if (getLangOpts().CPlusPlusModules && LexLevel == 1 && + Result.getFlag(Token::IsNewToken)) { switch (Result.getKind()) { case tok::l_paren: case tok::l_square: case tok::l_brace: ImportSeqState.handleOpenBracket(); @@ -952,6 +957,8 @@ LastTokenWasAt = Result.is(tok::at); --LexLevel; + if (OnToken && LexLevel == 0 && Result.getFlag(Token::IsNewToken)) + OnToken(Result); } /// Lex a header-name token (including one formed from header-name-tokens if