diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -16,6 +16,7 @@ #include "clang/Basic/LangOptions.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/TokenKinds.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "clang/Lex/PreprocessorLexer.h" #include "clang/Lex/Token.h" #include "llvm/ADT/Optional.h" @@ -149,6 +150,13 @@ // CurrentConflictMarkerState - The kind of conflict marker we are handling. ConflictMarkerKind CurrentConflictMarkerState; + /// Non-empty if this \p Lexer is \p isDependencyDirectivesLexer(). + ArrayRef DepDirectives; + + /// If this \p Lexer is \p isDependencyDirectivesLexer(), it represents the + /// next token to use from the current dependency directive. + unsigned NextDepDirectiveTokenIndex = 0; + void InitLexer(const char *BufStart, const char *BufPtr, const char *BufEnd); public: @@ -195,6 +203,23 @@ /// return the tok::eof token. This implicitly involves the preprocessor. bool Lex(Token &Result); + /// Called when the preprocessor is in 'dependency scanning lexing mode'. + bool LexDependencyDirectiveToken(Token &Result); + + /// Called when the preprocessor is in 'dependency scanning lexing mode' and + /// is skipping a conditional block. + bool LexDependencyDirectiveTokenWhileSkipping(Token &Result); + + /// True when the preprocessor is in 'dependency scanning lexing mode' and + /// created this \p Lexer for lexing a set of dependency directive tokens. + bool isDependencyDirectivesLexer() const { return !DepDirectives.empty(); } + + /// Initializes \p Result with data from \p DDTok and advances \p BufferPtr to + /// the position just after the token. + /// \returns the buffer pointer at the beginning of the token. + const char *convertDependencyDirectiveToken( + const dependency_directives_scan::Token &DDTok, Token &Result); + public: /// isPragmaLexer - Returns true if this Lexer is being used to lex a pragma. bool isPragmaLexer() const { return Is_PragmaLexer; } diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -557,6 +557,7 @@ CLK_Lexer, CLK_TokenLexer, CLK_CachingLexer, + CLK_DependencyDirectivesLexer, CLK_LexAfterModuleImport } CurLexerKind = CLK_Lexer; diff --git a/clang/include/clang/Lex/PreprocessorOptions.h b/clang/include/clang/Lex/PreprocessorOptions.h --- a/clang/include/clang/Lex/PreprocessorOptions.h +++ b/clang/include/clang/Lex/PreprocessorOptions.h @@ -10,7 +10,9 @@ #define LLVM_CLANG_LEX_PREPROCESSOROPTIONS_H_ #include "clang/Basic/BitmaskEnum.h" +#include "clang/Basic/FileEntry.h" #include "clang/Basic/LLVM.h" +#include "clang/Lex/DependencyDirectivesScanner.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" #include @@ -199,6 +201,19 @@ /// build it again. std::shared_ptr FailedModules; + /// Function for getting the dependency preprocessor directives of a file. + /// + /// These are directives derived from a special form of lexing where the + /// source input is scanned for the preprocessor directives that might have an + /// effect on the dependencies for a compilation unit. + /// + /// Enables a client to cache the directives for a file and provide them + /// across multiple compiler invocations. + /// FIXME: Allow returning an error. + std::function>( + FileEntryRef)> + DependencyDirectivesForFile; + /// Set up preprocessor for RunAnalysis action. bool SetUpStaticAnalyzer = false; diff --git a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h --- a/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h +++ b/clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h @@ -294,10 +294,6 @@ /// Enable directives scanning of all files. void enableDirectivesScanningOfAllFiles() { NotToBeScanned.clear(); } -private: - /// Check whether the file should be scanned for preprocessor directives. - bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID); - /// Returns entry for the given filename. /// /// Attempts to use the local and shared caches first, then falls back to @@ -306,6 +302,10 @@ getOrCreateFileSystemEntry(StringRef Filename, bool DisableDirectivesScanning = false); +private: + /// Check whether the file should be scanned for preprocessor directives. + bool shouldScanForDirectives(StringRef Filename, llvm::sys::fs::UniqueID UID); + /// For a filename that's not yet associated with any entry in the caches, /// uses the underlying filesystem to either look up the entry based in the /// shared cache indexed by unique ID, or creates new entry from scratch. diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -2937,6 +2937,13 @@ unsigned Lexer::isNextPPTokenLParen() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); + if (isDependencyDirectivesLexer()) { + if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) + return 2; + return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( + tok::l_paren); + } + // Switch to 'skipping' mode. This will ensure that we can lex a token // without emitting diagnostics, disables macro expansion, and will cause EOF // to return an EOF token instead of popping the include stack. @@ -3279,6 +3286,8 @@ } bool Lexer::Lex(Token &Result) { + assert(!isDependencyDirectivesLexer()); + // Start a new token. Result.startToken(); @@ -4100,3 +4109,129 @@ // We parsed the directive; lex a token with the new state. return false; } + +const char *Lexer::convertDependencyDirectiveToken( + const dependency_directives_scan::Token &DDTok, Token &Result) { + const char *TokPtr = BufferStart + DDTok.Offset; + Result.startToken(); + Result.setLocation(getSourceLocation(TokPtr)); + Result.setKind(DDTok.Kind); + Result.setFlag((Token::TokenFlags)DDTok.Flags); + Result.setLength(DDTok.Length); + BufferPtr = TokPtr + DDTok.Length; + return TokPtr; +} + +bool Lexer::LexDependencyDirectiveToken(Token &Result) { + assert(isDependencyDirectivesLexer()); + + using namespace dependency_directives_scan; + + while (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) { + if (DepDirectives.front().Kind == pp_eof) + return LexEndOfFile(Result, BufferEnd); + NextDepDirectiveTokenIndex = 0; + DepDirectives = DepDirectives.drop_front(); + } + + const dependency_directives_scan::Token &DDTok = + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex++]; + + const char *TokPtr = convertDependencyDirectiveToken(DDTok, Result); + + if (Result.is(tok::hash) && Result.isAtStartOfLine()) { + PP->HandleDirective(Result); + return false; + } + if (Result.is(tok::raw_identifier)) { + Result.setRawIdentifierData(TokPtr); + if (!isLexingRawMode()) { + IdentifierInfo *II = PP->LookUpIdentifierInfo(Result); + if (II->isHandleIdentifierCase()) + return PP->HandleIdentifier(Result); + } + return true; + } + if (Result.isLiteral()) { + Result.setLiteralData(TokPtr); + return true; + } + if (Result.is(tok::colon) && + (LangOpts.CPlusPlus || LangOpts.DoubleSquareBracketAttributes)) { + // Convert consecutive colons to 'tok::coloncolon'. + if (*BufferPtr == ':') { + assert(DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( + tok::colon)); + ++NextDepDirectiveTokenIndex; + Result.setKind(tok::coloncolon); + } + return true; + } + if (Result.is(tok::eod)) + ParsingPreprocessorDirective = false; + + return true; +} + +bool Lexer::LexDependencyDirectiveTokenWhileSkipping(Token &Result) { + assert(isDependencyDirectivesLexer()); + + using namespace dependency_directives_scan; + + bool Stop = false; + unsigned NestedIfs = 0; + do { + DepDirectives = DepDirectives.drop_front(); + switch (DepDirectives.front().Kind) { + case pp_none: + llvm_unreachable("unexpected 'pp_none'"); + case pp_include: + case pp___include_macros: + case pp_define: + case pp_undef: + case pp_import: + case pp_pragma_import: + case pp_pragma_once: + case pp_pragma_push_macro: + case pp_pragma_pop_macro: + case pp_pragma_include_alias: + case pp_include_next: + case decl_at_import: + case cxx_module_decl: + case cxx_import_decl: + case cxx_export_module_decl: + case cxx_export_import_decl: + break; + case pp_if: + case pp_ifdef: + case pp_ifndef: + ++NestedIfs; + break; + case pp_elif: + case pp_elifdef: + case pp_elifndef: + case pp_else: + if (!NestedIfs) { + Stop = true; + } + break; + case pp_endif: + if (!NestedIfs) { + Stop = true; + } else { + --NestedIfs; + } + break; + case pp_eof: + return LexEndOfFile(Result, BufferEnd); + } + } while (!Stop); + + const dependency_directives_scan::Token &DDTok = + DepDirectives.front().Tokens.front(); + assert(DDTok.is(tok::hash)); + NextDepDirectiveTokenIndex = 1; + + convertDependencyDirectiveToken(DDTok, Result); + return false; +} diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -426,29 +426,40 @@ Token Tok; SourceLocation endLoc; while (true) { - CurLexer->Lex(Tok); + if (CurLexer->isDependencyDirectivesLexer()) { + CurLexer->LexDependencyDirectiveTokenWhileSkipping(Tok); + } else { + while (true) { + CurLexer->Lex(Tok); - if (Tok.is(tok::code_completion)) { - setCodeCompletionReached(); - if (CodeComplete) - CodeComplete->CodeCompleteInConditionalExclusion(); - continue; - } + if (Tok.is(tok::code_completion)) { + setCodeCompletionReached(); + if (CodeComplete) + CodeComplete->CodeCompleteInConditionalExclusion(); + continue; + } - // If this is the end of the buffer, we have an error. - if (Tok.is(tok::eof)) { - // We don't emit errors for unterminated conditionals here, - // Lexer::LexEndOfFile can do that properly. - // Just return and let the caller lex after this #include. - if (PreambleConditionalStack.isRecording()) - PreambleConditionalStack.SkipInfo.emplace( - HashTokenLoc, IfTokenLoc, FoundNonSkipPortion, FoundElse, ElseLoc); - break; - } + // If this is the end of the buffer, we have an error. + if (Tok.is(tok::eof)) { + // We don't emit errors for unterminated conditionals here, + // Lexer::LexEndOfFile can do that properly. + // Just return and let the caller lex after this #include. + if (PreambleConditionalStack.isRecording()) + PreambleConditionalStack.SkipInfo.emplace(HashTokenLoc, IfTokenLoc, + FoundNonSkipPortion, + FoundElse, ElseLoc); + break; + } - // If this token is not a preprocessor directive, just skip it. - if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) - continue; + // If this token is not a preprocessor directive, just skip it. + if (Tok.isNot(tok::hash) || !Tok.isAtStartOfLine()) + continue; + + break; + } + } + if (Tok.is(tok::eof)) + break; // We just parsed a # character at the start of a line, so we're in // directive mode. Tell the lexer this so any newlines we see will be diff --git a/clang/lib/Lex/PPLexerChange.cpp b/clang/lib/Lex/PPLexerChange.cpp --- a/clang/lib/Lex/PPLexerChange.cpp +++ b/clang/lib/Lex/PPLexerChange.cpp @@ -91,8 +91,19 @@ CodeCompletionFileLoc.getLocWithOffset(CodeCompletionOffset); } - EnterSourceFileWithLexer( - new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile), CurDir); + Lexer *TheLexer = new Lexer(FID, *InputFile, *this, IsFirstIncludeOfFile); + if (getPreprocessorOpts().DependencyDirectivesForFile && + FID != PredefinesFileID) { + if (Optional File = SourceMgr.getFileEntryRefForID(FID)) { + if (Optional> + DepDirectives = + getPreprocessorOpts().DependencyDirectivesForFile(*File)) { + TheLexer->DepDirectives = *DepDirectives; + } + } + } + + EnterSourceFileWithLexer(TheLexer, CurDir); return false; } @@ -110,7 +121,9 @@ CurDirLookup = CurDir; CurLexerSubmodule = nullptr; if (CurLexerKind != CLK_LexAfterModuleImport) - CurLexerKind = CLK_Lexer; + CurLexerKind = TheLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; // Notify the client, if desired, that we are in a new source file. if (Callbacks && !CurLexer->Is_PragmaLexer) { diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -377,7 +377,9 @@ void Preprocessor::recomputeCurLexerKind() { if (CurLexer) - CurLexerKind = CLK_Lexer; + CurLexerKind = CurLexer->isDependencyDirectivesLexer() + ? CLK_DependencyDirectivesLexer + : CLK_Lexer; else if (CurTokenLexer) CurLexerKind = CLK_TokenLexer; else @@ -640,6 +642,9 @@ case CLK_CachingLexer: CachingLex(Tok); break; + case CLK_DependencyDirectivesLexer: + CurLexer->LexDependencyDirectiveToken(Tok); + break; case CLK_LexAfterModuleImport: LexAfterModuleImport(Tok); break; @@ -901,6 +906,9 @@ CachingLex(Result); ReturnedToken = true; break; + case CLK_DependencyDirectivesLexer: + ReturnedToken = CurLexer->LexDependencyDirectiveToken(Result); + break; case CLK_LexAfterModuleImport: ReturnedToken = LexAfterModuleImport(Result); break; diff --git a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp --- a/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp +++ b/clang/lib/Tooling/DependencyScanning/DependencyScanningWorker.cpp @@ -200,6 +200,17 @@ // filesystem. FileMgr->setVirtualFileSystem(createVFSFromCompilerInvocation( ScanInstance.getInvocation(), ScanInstance.getDiagnostics(), DepFS)); + + llvm::IntrusiveRefCntPtr LocalDepFS = + DepFS; + ScanInstance.getPreprocessorOpts().DependencyDirectivesForFile = + [LocalDepFS = std::move(LocalDepFS)](FileEntryRef File) + -> Optional> { + if (llvm::ErrorOr Entry = + LocalDepFS->getOrCreateFileSystemEntry(File.getName())) + return Entry->getDirectiveTokens(); + return None; + }; } // Create the dependency collector that will collect the produced