diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -33,6 +33,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerUnion.h" @@ -48,8 +49,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -114,6 +115,21 @@ MU_Undef = 2 }; +/// Captures some information about where the tokens come from. Used by the +/// callback that records tokens. +enum class TokenSource { + File, // a token coming directly from a file that is not a macro directive, + // a macro name, etc. + MacroNameOrArg, // a name or an argument of a macro expansion. + MacroExpansion, // a token coming from a body of a macro expansion. + MacroDirective, // a token from a macro directive body, i.e. '#' and all the + // tokens till the end of the line, + SkippedPPBranch, // a token from a disabled #if or #ifdef branch. + Precached, // a token from a previously saved token stream. + AfterModuleImport, // FIXME: look into this case more closely, describe what + // is is. +}; + /// Engages in a tight little dance with the lexer to efficiently /// preprocess tokens. /// @@ -124,6 +140,7 @@ friend class VAOptDefinitionContext; friend class VariadicMacroScopeGuard; + llvm::unique_function OnToken; std::shared_ptr PPOpts; DiagnosticsEngine *Diags; LangOptions &LangOpts; @@ -911,6 +928,15 @@ } /// \} + /// Register a function that would be called on each token seen by the + /// preprocessor. This is a very low-level hook, the produced token stream is + /// tied to the internals of the preprocessor so interpreting result of the + /// callback is hard. + void setTokenWatcher( + llvm::unique_function F) { + OnToken = std::move(F); + } + bool isMacroDefined(StringRef Id) { return isMacroDefined(&Identifiers.get(Id)); } diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -147,6 +147,10 @@ /// preprocessor directive. bool isParsingPreprocessorDirective() const; + /// Returns true iff the TokenLexer is expanding a macro and not replaying a + /// stream of tokens. + bool isMacroExpansion() const { return Macro != nullptr; } + private: void destroy(); diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -400,6 +400,9 @@ setCodeCompletionReached(); continue; } + // This token is not reported to + if (OnToken) + OnToken(Tok, TokenSource::SkippedPPBranch); // If this is the end of the buffer, we have an error. if (Tok.is(tok::eof)) { @@ -865,6 +868,10 @@ // Save the '#' token in case we need to return it later. Token SavedHash = Result; + // Lex() never sees the '#' token from directives, so report it here. + if (OnToken) + OnToken(Result, TokenSource::MacroDirective); + // Read the next token, the directive flavor. This isn't expanded due to // C99 6.10.3p8. LexUnexpandedToken(Result); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -463,6 +463,10 @@ const MacroDefinition &M) { MacroInfo *MI = M.getMacroInfo(); + // The macro-expanded identifiers are not seen by the Lex() method. + if (OnToken) + OnToken(Identifier, TokenSource::MacroNameOrArg); + // If this is a macro expansion in the "#if !defined(x)" line for the file, // then the macro could expand to different things in other contexts, we need // to disable the optimization in this case. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -864,20 +864,32 @@ void Preprocessor::Lex(Token &Result) { // We loop here until a lex function returns a token; this avoids recursion. bool ReturnedToken; + TokenSource Source; do { switch (CurLexerKind) { case CLK_Lexer: + if (CurLexer->ParsingPreprocessorDirective) + Source = TokenSource::MacroDirective; + else if (DisableMacroExpansion) + Source = TokenSource::MacroNameOrArg; + else + Source = TokenSource::File; + ReturnedToken = CurLexer->Lex(Result); break; case CLK_TokenLexer: + Source = CurTokenLexer->isMacroExpansion() ? TokenSource::MacroExpansion + : TokenSource::Precached; ReturnedToken = CurTokenLexer->Lex(Result); break; case CLK_CachingLexer: CachingLex(Result); + Source = TokenSource::Precached; ReturnedToken = true; break; case CLK_LexAfterModuleImport: LexAfterModuleImport(Result); + Source = TokenSource::AfterModuleImport; ReturnedToken = true; break; } @@ -893,6 +905,8 @@ } LastTokenWasAt = Result.is(tok::at); + if (OnToken) + OnToken(Result, Source); } /// Lex a header-name token (including one formed from header-name-tokens if