diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -33,6 +33,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/FunctionExtras.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerUnion.h" @@ -48,8 +49,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -114,6 +115,23 @@ MU_Undef = 2 }; +/// Captures information about where the tokens come from. Used by the callback +/// that records tokens. +struct TokenSource { + /// A token is a name of a macro in a macro expansion. + bool IsMacroName = false; + /// A token is an argument of a macro expansion. + bool IsMacroArg = false; + /// A token is part of a macro argument pre-expansion. + bool InMacroArgPreExpansion = false; + /// A token is inside a PP directive. + bool InDirective = false; + /// Token is part of a skipped PP branch, i.e. disabled #if or #elif. + bool InSkippedPPBranch = false; + /// This token comes from a pre-cached token stream. + bool IsCached = false; +}; + /// Engages in a tight little dance with the lexer to efficiently /// preprocess tokens. /// @@ -124,6 +142,7 @@ friend class VAOptDefinitionContext; friend class VariadicMacroScopeGuard; + llvm::unique_function OnToken; std::shared_ptr PPOpts; DiagnosticsEngine *Diags; LangOptions &LangOpts; @@ -997,6 +1016,15 @@ } /// \} + /// Register a function that would be called on each token seen by the + /// preprocessor. This is a very low-level hook, the produced token stream is + /// tied to the internals of the preprocessor so interpreting result of the + /// callback is hard. + void setTokenWatcher( + llvm::unique_function F) { + OnToken = std::move(F); + } + bool isMacroDefined(StringRef Id) { return isMacroDefined(&Identifiers.get(Id)); } diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -147,6 +147,10 @@ /// preprocessor directive. bool isParsingPreprocessorDirective() const; + /// Returns true iff the TokenLexer is expanding a macro and not replaying a + /// stream of tokens. + bool isMacroExpansion() const { return Macro != nullptr; } + private: void destroy(); diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -404,6 +404,12 @@ setCodeCompletionReached(); continue; } + // This token is not reported to + if (OnToken) { + TokenSource S; + S.InSkippedPPBranch = true; + OnToken(Tok, S); + } // If this is the end of the buffer, we have an error. if (Tok.is(tok::eof)) { @@ -883,6 +889,13 @@ // Save the '#' token in case we need to return it later. Token SavedHash = Result; + // Lex() never sees the '#' token from directives, so report it here. + if (OnToken) { + TokenSource S; + S.InDirective = true; + OnToken(Result, S); + } + // Read the next token, the directive flavor. This isn't expanded due to // C99 6.10.3p8. LexUnexpandedToken(Result); diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -463,6 +463,15 @@ const MacroDefinition &M) { MacroInfo *MI = M.getMacroInfo(); + // The macro-expanded identifiers are not seen by the Lex() method. + if (OnToken) { + TokenSource S; + S.InDirective = CurLexer && CurLexer->ParsingPreprocessorDirective; + S.InMacroArgPreExpansion = InMacroArgPreExpansion; + S.IsMacroName = true; + OnToken(Identifier, S); + } + // If this is a macro expansion in the "#if !defined(x)" line for the file, // then the macro could expand to different things in other contexts, we need // to disable the optimization in this case. diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -867,20 +867,36 @@ // We loop here until a lex function returns a token; this avoids recursion. bool ReturnedToken; bool IsNewToken = true; + TokenSource Source; do { + Source = TokenSource(); + switch (CurLexerKind) { case CLK_Lexer: + Source.InDirective = CurLexer->ParsingPreprocessorDirective; + Source.IsMacroArg = InMacroArgs; + Source.InMacroArgPreExpansion = InMacroArgPreExpansion; + ReturnedToken = CurLexer->Lex(Result); break; case CLK_TokenLexer: + Source.IsMacroArg = InMacroArgs; + Source.InMacroArgPreExpansion = InMacroArgPreExpansion; + Source.IsCached = !CurTokenLexer->isMacroExpansion(); + ReturnedToken = CurTokenLexer->Lex(Result); break; case CLK_CachingLexer: + Source.IsCached = true; + CachingLex(Result, IsNewToken); ReturnedToken = true; break; case CLK_LexAfterModuleImport: - ReturnedToken = LexAfterModuleImport(Result); + Source.InDirective = true; + + LexAfterModuleImport(Result); + ReturnedToken = true; break; } } while (!ReturnedToken); @@ -937,6 +953,8 @@ LastTokenWasAt = Result.is(tok::at); --LexLevel; + if (OnToken) + OnToken(Result, Source); } /// Lex a header-name token (including one formed from header-name-tokens if